9 年之前 · 1fafa427d6
--- a/tripal_chado/includes/loaders/tripal_chado.fasta_loader.inc
+++ b/tripal_chado/includes/loaders/tripal_chado.fasta_loader.inc
@@ -1,1004 +0,0 @@
 
				-<?php
			
 
				-/**
			
 
				- * @file
			
 
				- * Provides fasta loading functionality. Creates features based on their specification
			
 
				- * in a fasta file.
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * @defgroup fasta_loader FASTA Feature Loader
			
 
				- * @ingroup tripal_chado
			
 
				- * @{
			
 
				- * Provides fasta loading functionality.
			
 
				- * Creates features based on their specification
			
 
				- * in a fasta file.
			
 
				- * @}
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * The form to submit a fasta loading job
			
 
				- *
			
 
				- * @ingroup fasta_loader
			
 
				- */
			
 
				-function tripal_feature_fasta_load_form() {
			
 
				-  $form['fasta_file'] = array('#type' => 'textfield','#title' => t('FASTA File'),
			
 
				-    '#description' => t('Please enter the full system path for the FASTA file, or a path within the Drupal
			
 
				-                           installation (e.g. /sites/default/files/xyz.obo).  The path must be accessible to the
			
 
				-                           server on which this Drupal instance is running.'),'#required' => TRUE
			
 
				-  );
			
 
				-
			
 
				-  // get the list of organisms
			
 
				-  $sql = "SELECT * FROM {organism} ORDER BY genus, species";
			
 
				-  $org_rset = chado_query($sql);
			
 
				-  $organisms = array();
			
 
				-  $organisms[''] = '';
			
 
				-  while ($organism = $org_rset->fetchObject()) {
			
 
				-    $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
			
 
				-  }
			
 
				-  $form['organism_id'] = array('#title' => t('Organism'),'#type' => t('select'),
			
 
				-    '#description' => t("Choose the organism to which these sequences are associated"),
			
 
				-    '#required' => TRUE,'#options' => $organisms
			
 
				-  );
			
 
				-
			
 
				-  // get the sequence ontology CV ID
			
 
				-  $values = array('name' => 'sequence');
			
 
				-  $cv = chado_select_record('cv', array('cv_id'), $values);
			
 
				-  $cv_id = $cv[0]->cv_id;
			
 
				-
			
 
				-  $form['seqtype'] = array('#type' => 'textfield','#title' => t('Sequence Type'),
			
 
				-    '#required' => TRUE,
			
 
				-    '#description' => t('Please enter the Sequence Ontology (SO) term name that describes the sequences in the FASTA file (e.g. gene, mRNA, polypeptide, etc...)'),
			
 
				-    '#autocomplete_path' => "admin/tripal/storage/chado/auto_name/cvterm/$cv_id"
			
 
				-  );
			
 
				-
			
 
				-  $form['method'] = array('#type' => 'radios','#title' => 'Method','#required' => TRUE,
			
 
				-    '#options' => array(t('Insert only'),t('Update only'),t('Insert and update')
			
 
				-    ),
			
 
				-    '#description' => t('Select how features in the FASTA file are handled.
			
 
				-       Select "Insert only" to insert the new features. If a feature already
			
 
				-       exists with the same name or unique name and type then it is skipped.
			
 
				-       Select "Update only" to only update featues that already exist in the
			
 
				-       database.  Select "Insert and Update" to insert features that do
			
 
				-       not exist and upate those that do.'),'#default_value' => 2
			
 
				-  );
			
 
				-  $form['match_type'] = array('#type' => 'radios','#title' => 'Name Match Type','#required' => TRUE,
			
 
				-    '#options' => array(t('Name'),t('Unique name')
			
 
				-    ),
			
 
				-    '#description' => t('Used for "updates only" or "insert and update" methods. Not required if method type is "insert".
			
 
				-      Feature data is stored in Chado with both a human-readable
			
 
				-      name and a unique name. If the features in your FASTA file are uniquely identified using
			
 
				-      a human-readable name then select the "Name" button. If your features are
			
 
				-      uniquely identified using the unique name then select the "Unique name" button.  If you
			
 
				-      loaded your features first using the GFF loader then the unique name of each
			
 
				-      features were indicated by the "ID=" attribute and the name by the "Name=" attribute.
			
 
				-      By default, the FASTA loader will use the first word (character string
			
 
				-      before the first space) as  the name for your feature. If
			
 
				-      this does not uniquely identify your feature consider specifying a regular expression in the advanced section below.
			
 
				-      Additionally, you may import both a name and a unique name for each sequence using the advanced options.'),
			
 
				-    '#default_value' => 1
			
 
				-  );
			
 
				-
			
 
				-  $form['analysis'] = array('#type' => 'fieldset','#title' => t('Analysis Used to Derive Features'),
			
 
				-    '#collapsed' => TRUE
			
 
				-  );
			
 
				-  $form['analysis']['desc'] = array(
			
 
				-    '#markup' => t("Why specify an analysis for a data load?  All data comes
			
 
				-       from some place, even if downloaded from Genbank. By specifying
			
 
				-       analysis details for all data uploads, it allows an end user to reproduce the
			
 
				-       data set, but at least indicates the source of the data.")
			
 
				-  );
			
 
				-
			
 
				-  // get the list of organisms
			
 
				-  $sql = "SELECT * FROM {analysis} ORDER BY name";
			
 
				-  $org_rset = chado_query($sql);
			
 
				-  $analyses = array();
			
 
				-  $analyses[''] = '';
			
 
				-  while ($analysis = $org_rset->fetchObject()) {
			
 
				-    $analyses[$analysis->analysis_id] = "$analysis->name ($analysis->program $analysis->programversion, $analysis->sourcename)";
			
 
				-  }
			
 
				-  $form['analysis']['analysis_id'] = array('#title' => t('Analysis'),'#type' => t('select'),
			
 
				-    '#description' => t("Choose the analysis to which these features are associated"),
			
 
				-    '#required' => TRUE,'#options' => $analyses
			
 
				-  );
			
 
				-
			
 
				-  // Advanced Options
			
 
				-  $form['advanced'] = array('#type' => 'fieldset','#title' => t('Advanced Options'),
			
 
				-    '#collapsible' => TRUE,'#collapsed' => TRUE
			
 
				-  );
			
 
				-  $form['advanced']['re_help'] = array('#type' => 'item',
			
 
				-    '#value' => t('A regular expression is an advanced method for extracting information from a string of text.
			
 
				-                   Your FASTA file may contain both a human-readable name and a unique name for each sequence.
			
 
				-                   If you want to import
			
 
				-                   both the name and unique name for all sequences, then you must provide regular expressions
			
 
				-                   so that the loader knows how to separate them.
			
 
				-                   Otherwise the name and uniquename will be the same.
			
 
				-                   By default, this loader will use the first word in the definition
			
 
				-                   lines of the FASTA file
			
 
				-                   as the name or unique name of the feature.')
			
 
				-  );
			
 
				-  $form['advanced']['re_name'] = array('#type' => 'textfield',
			
 
				-    '#title' => t('Regular expression for the name'),'#required' => FALSE,
			
 
				-    '#description' => t('Enter the regular expression that will extract the
			
 
				-       feature name from the FASTA definition line. For example, for a
			
 
				-       defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename),
			
 
				-       the regular expression for the name would be, "^(.*?)\|.*$".  All FASTA
			
 
				-       definition lines begin with the ">" symbol.  You do not need to incldue
			
 
				-       this symbol in your regular expression.')
			
 
				-  );
			
 
				-  $form['advanced']['re_uname'] = array('#type' => 'textfield',
			
 
				-    '#title' => t('Regular expression for the unique name'),'#required' => FALSE,
			
 
				-    '#description' => t('Enter the regular expression that will extract the
			
 
				-       feature name from the FASTA definition line. For example, for a
			
 
				-       defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename),
			
 
				-       the regular expression for the unique name would be "^.*?\|(.*)$").  All FASTA
			
 
				-       definition lines begin with the ">" symbol.  You do not need to incldue
			
 
				-       this symbol in your regular expression.')
			
 
				-  );
			
 
				-
			
 
				-  // Advanced database cross reference options.
			
 
				-  $form['advanced']['db'] = array('#type' => 'fieldset',
			
 
				-    '#title' => t('External Database Reference'),'#weight' => 6,'#collapsed' => TRUE
			
 
				-  );
			
 
				-  $form['advanced']['db']['re_accession'] = array('#type' => 'textfield',
			
 
				-    '#title' => t('Regular expression for the accession'),'#required' => FALSE,
			
 
				-    '#description' => t('Enter the regular expression that will extract the accession for the external database for each feature from the FASTA definition line.'),
			
 
				-    '#weight' => 2
			
 
				-  );
			
 
				-
			
 
				-  // get the list of databases
			
 
				-  $sql = "SELECT * FROM {db} ORDER BY name";
			
 
				-  $db_rset = chado_query($sql);
			
 
				-  $dbs = array();
			
 
				-  $dbs[''] = '';
			
 
				-  while ($db = $db_rset->fetchObject()) {
			
 
				-    $dbs[$db->db_id] = "$db->name";
			
 
				-  }
			
 
				-  $form['advanced']['db']['db_id'] = array('#title' => t('External Database'),
			
 
				-    '#type' => t('select'),
			
 
				-    '#description' => t("Plese choose an external database for which these sequences have a cross reference."),
			
 
				-    '#required' => FALSE,'#options' => $dbs,'#weight' => 1
			
 
				-  );
			
 
				-
			
 
				-  $form['advanced']['relationship'] = array('#type' => 'fieldset','#title' => t('Relationships'),
			
 
				-    '#weight' => 6,'#collapsed' => TRUE
			
 
				-  );
			
 
				-  $rels = array();
			
 
				-  $rels[''] = '';
			
 
				-  $rels['part_of'] = 'part of';
			
 
				-  $rels['derives_from'] = 'produced by (derives from)';
			
 
				-
			
 
				-  // Advanced references options
			
 
				-  $form['advanced']['relationship']['rel_type'] = array('#title' => t('Relationship Type'),
			
 
				-    '#type' => t('select'),
			
 
				-    '#description' => t("Use this option to create associations, or relationships between the
			
 
				-                        features of this FASTA file and existing features in the database. For
			
 
				-                        example, to associate a FASTA file of peptides to existing genes or transcript sequence,
			
 
				-                        select the type 'produced by'. For a CDS sequences select the type 'part of'"),
			
 
				-    '#required' => FALSE,'#options' => $rels,'#weight' => 5
			
 
				-  );
			
 
				-  $form['advanced']['relationship']['re_subject'] = array('#type' => 'textfield',
			
 
				-    '#title' => t('Regular expression for the parent'),'#required' => FALSE,
			
 
				-    '#description' => t('Enter the regular expression that will extract the unique
			
 
				-                         name needed to identify the existing sequence for which the
			
 
				-                         relationship type selected above will apply.'),'#weight' => 6
			
 
				-  );
			
 
				-  $form['advanced']['relationship']['parent_type'] = array('#type' => 'textfield',
			
 
				-    '#title' => t('Parent Type'),'#required' => FALSE,
			
 
				-    '#description' => t('Please enter the Sequence Ontology term for the parent.  For example
			
 
				-                         if the FASTA file being loaded is a set of proteins that are
			
 
				-                         products of genes, then use the SO term \'gene\' or \'transcript\' or equivalent. However,
			
 
				-                         this type must match the type for already loaded features.'),
			
 
				-    '#weight' => 7
			
 
				-  );
			
 
				-
			
 
				-  $form['button'] = array('#type' => 'submit','#value' => t('Import FASTA file'),'#weight' => 10
			
 
				-  );
			
 
				-  return $form;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Validate the fasta loader job form
			
 
				- *
			
 
				- * @ingroup fasta_loader
			
 
				- */
			
 
				-function tripal_feature_fasta_load_form_validate($form, &$form_state) {
			
 
				-  $fasta_file = trim($form_state['values']['fasta_file']);
			
 
				-  $organism_id = $form_state['values']['organism_id'];
			
 
				-  $type = trim($form_state['values']['seqtype']);
			
 
				-  $method = trim($form_state['values']['method']);
			
 
				-  $match_type = trim($form_state['values']['match_type']);
			
 
				-  $re_name = trim($form_state['values']['re_name']);
			
 
				-  $re_uname = trim($form_state['values']['re_uname']);
			
 
				-  $re_accession = trim($form_state['values']['re_accession']);
			
 
				-  $db_id = $form_state['values']['db_id'];
			
 
				-  $rel_type = $form_state['values']['rel_type'];
			
 
				-  $re_subject = trim($form_state['values']['re_subject']);
			
 
				-  $parent_type = trim($form_state['values']['parent_type']);
			
 
				-
			
 
				-  if ($method == 0) {
			
 
				-    $method = 'Insert only';
			
 
				-  }
			
 
				-  if ($method == 1) {
			
 
				-    $method = 'Update only';
			
 
				-  }
			
 
				-  if ($method == 2) {
			
 
				-    $method = 'Insert and update';
			
 
				-  }
			
 
				-
			
 
				-  if ($match_type == 0) {
			
 
				-    $match_type = 'Name';
			
 
				-  }
			
 
				-
			
 
				-  if ($match_type == 1) {
			
 
				-    $match_type = 'Unique name';
			
 
				-  }
			
 
				-
			
 
				-  if ($re_name and !$re_uname and strcmp($match_type, 'Unique name') == 0) {
			
 
				-    form_set_error('re_uname', t("You must provide a regular expression to identify the sequence unique name"));
			
 
				-  }
			
 
				-
			
 
				-  if (!$re_name and $re_uname and strcmp($match_type, 'Name') == 0) {
			
 
				-    form_set_error('re_name', t("You must provide a regular expression to identify the sequence name"));
			
 
				-  }
			
 
				-
			
 
				-  // check to see if the file is located local to Drupal
			
 
				-  $fasta_file = trim($fasta_file);
			
 
				-  $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $fasta_file;
			
 
				-  if (!file_exists($dfile)) {
			
 
				-    // if not local to Drupal, the file must be someplace else, just use
			
 
				-    // the full path provided
			
 
				-    $dfile = $fasta_file;
			
 
				-  }
			
 
				-  if (!file_exists($dfile)) {
			
 
				-    form_set_error('fasta_file', t("Cannot find the file on the system. Check that the file exists or that the web server has permissions to read the file."));
			
 
				-  }
			
 
				-
			
 
				-  // make sure if a relationship is specified that all fields are provided.
			
 
				-  if (($rel_type or $parent_type) and !$re_subject) {
			
 
				-    form_set_error('re_subject', t("Please provide a regular expression for the parent"));
			
 
				-  }
			
 
				-  if (($rel_type or $re_subject) and !$parent_type) {
			
 
				-    form_set_error('parent_type', t("Please provide a SO term for the parent"));
			
 
				-  }
			
 
				-  if (($parent_type or $re_subject) and !$rel_type) {
			
 
				-    form_set_error('rel_type', t("Please select a relationship type"));
			
 
				-  }
			
 
				-
			
 
				-  // make sure if a database is specified that all fields are provided
			
 
				-  if ($db_id and !$re_accession) {
			
 
				-    form_set_error('re_accession', t("Please provide a regular expression for the accession"));
			
 
				-  }
			
 
				-  if ($re_accession and !$db_id) {
			
 
				-    form_set_error('db_id', t("Please select a database"));
			
 
				-  }
			
 
				-
			
 
				-  // check to make sure the types exists
			
 
				-  $cvtermsql = "SELECT CVT.cvterm_id
			
 
				-               FROM {cvterm} CVT
			
 
				-                  INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
			
 
				-                  LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
			
 
				-               WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)";
			
 
				-  $cvterm = chado_query($cvtermsql, array(':cvname' => 'sequence',':name' => $type,
			
 
				-    ':synonym' => $type
			
 
				-  ))->fetchObject();
			
 
				-  if (!$cvterm) {
			
 
				-    form_set_error('type', t("The Sequence Ontology (SO) term selected for the sequence type is not available in the database. Please check spelling or select another."));
			
 
				-  }
			
 
				-  if ($rel_type) {
			
 
				-    $cvterm = chado_query($cvtermsql, array(':cvname' => 'sequence',':name' => $parent_type,
			
 
				-      ':synonym' => $parent_type
			
 
				-    ))->fetchObject();
			
 
				-    if (!$cvterm) {
			
 
				-      form_set_error('parent_type', t("The Sequence Ontology (SO) term selected for the parent relationship is not available in the database. Please check spelling or select another."));
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // check to make sure the 'relationship' and 'sequence' ontologies are loaded
			
 
				-  $form_state['storage']['dfile'] = $dfile;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Submit a fasta loading job
			
 
				- *
			
 
				- * @ingroup fasta_loader
			
 
				- */
			
 
				-function tripal_feature_fasta_load_form_submit($form, &$form_state) {
			
 
				-  global $user;
			
 
				-
			
 
				-  $dfile = $form_state['storage']['dfile'];
			
 
				-  $organism_id = $form_state['values']['organism_id'];
			
 
				-  $type = trim($form_state['values']['seqtype']);
			
 
				-  $method = trim($form_state['values']['method']);
			
 
				-  $match_type = trim($form_state['values']['match_type']);
			
 
				-  $re_name = trim($form_state['values']['re_name']);
			
 
				-  $re_uname = trim($form_state['values']['re_uname']);
			
 
				-  $re_accession = trim($form_state['values']['re_accession']);
			
 
				-  $db_id = $form_state['values']['db_id'];
			
 
				-  $rel_type = $form_state['values']['rel_type'];
			
 
				-  $re_subject = trim($form_state['values']['re_subject']);
			
 
				-  $parent_type = trim($form_state['values']['parent_type']);
			
 
				-  $analysis_id = $form_state['values']['analysis_id'];
			
 
				-
			
 
				-  if ($method == 0) {
			
 
				-    $method = 'Insert only';
			
 
				-  }
			
 
				-  if ($method == 1) {
			
 
				-    $method = 'Update only';
			
 
				-  }
			
 
				-  if ($method == 2) {
			
 
				-    $method = 'Insert and update';
			
 
				-  }
			
 
				-
			
 
				-  if ($match_type == 0) {
			
 
				-    $match_type = 'Name';
			
 
				-  }
			
 
				-
			
 
				-  if ($match_type == 1) {
			
 
				-    $match_type = 'Unique name';
			
 
				-  }
			
 
				-
			
 
				-  $args = array($dfile,$organism_id,$type,$re_name,$re_uname,$re_accession,$db_id,$rel_type,
			
 
				-    $re_subject,$parent_type,$method,$user->uid,$analysis_id,$match_type
			
 
				-  );
			
 
				-
			
 
				-  $fname = preg_replace("/.*\/(.*)/", "$1", $dfile);
			
 
				-  $includes = array(
			
 
				-    module_load_include('inc', 'tripal_chado', 'includes/loaders/tripal_chado.fasta_loader'),
			
 
				-  );
			
 
				-  tripal_add_job("Import FASTA file: $fname", 'tripal_chado', 'tripal_feature_load_fasta', $args, $user->uid, 10, $includes);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Actually load a fasta file.
			
 
				- * This is the function called by tripal jobs
			
 
				- *
			
 
				- * @param $dfile The
			
 
				- *          full path to the fasta file to load
			
 
				- * @param $organism_id The
			
 
				- *          organism_id of the organism these features are from
			
 
				- * @param $type The
			
 
				- *          type of features contained in the fasta file
			
 
				- * @param $re_name A
			
 
				- *          regular expression to extract the feature.name from the fasta header
			
 
				- * @param $re_uname A
			
 
				- *          regular expression to extract the feature.uniquename from the fasta header
			
 
				- * @param $re_accession A
			
 
				- *          regular expression to extract the accession of the feature.dbxref_id
			
 
				- * @param $db_id The
			
 
				- *          db_id of the above dbxref
			
 
				- * @param $rel_type The
			
 
				- *          type of relationship when creating a feature_relationship between this
			
 
				- *          feature (object) and an extracted subject
			
 
				- * @param $re_subject The
			
 
				- *          regular expression to extract the uniquename of the feature to be the subject
			
 
				- *          of the above specified relationship
			
 
				- * @param $parent_type The
			
 
				- *          type of the parent feature
			
 
				- * @param $method The
			
 
				- *          method of feature adding. (ie: 'Insert only', 'Update only', 'Insert and update')
			
 
				- * @param $uid The
			
 
				- *          user id of the user who submitted the job
			
 
				- * @param $analysis_id The
			
 
				- *          analysis_id to associate the features in this fasta file with
			
 
				- * @param $match_type Whether
			
 
				- *          to match existing features based on the 'Name' or 'Unique name'
			
 
				- * @param $job =
			
 
				- *          NULL
			
 
				- *          The tripal job
			
 
				- *
			
 
				- *          @ingroup fasta_loader
			
 
				- */
			
 
				-function tripal_feature_load_fasta($dfile, $organism_id, $type, $re_name, $re_uname, $re_accession,
			
 
				-  $db_id, $rel_type, $re_subject, $parent_type, $method, $uid, $analysis_id, $match_type,
			
 
				-  $job = NULL) {
			
 
				-  $transaction = db_transaction();
			
 
				-  print "\nNOTE: Loading of this GFF file is performed using a database transaction. \n" .
			
 
				-     "If the load fails or is terminated prematurely then the entire set of \n" .
			
 
				-     "insertions/updates is rolled back and will not be found in the database\n\n";
			
 
				-  try {
			
 
				-
			
 
				-    // First get the type for this sequence.
			
 
				-    $cvtermsql = "
			
 
				-      SELECT CVT.cvterm_id
			
 
				-      FROM {cvterm} CVT
			
 
				-        INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
			
 
				-        LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
			
 
				-      WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)
			
 
				-    ";
			
 
				-    $cvterm = chado_query($cvtermsql, array(':cvname' => 'sequence',':name' => $type,':synonym' => $type))->fetchObject();
			
 
				-    if (!$cvterm) {
			
 
				-      tripal_report_error("T_fasta_loader", TRIPAL_ERROR,
			
 
				-        "Cannot find the term type: '%type'", array('%type' => $type));
			
 
				-      return 0;
			
 
				-    }
			
 
				-
			
 
				-    // Second, if there is a parent type then get that.
			
 
				-    if ($parent_type) {
			
 
				-      $parentcvterm = chado_query($cvtermsql, array(':cvname' => 'sequence', ':name' => $parent_type,':synonym' => $parent_type))->fetchObject();
			
 
				-      if (!$parentcvterm) {
			
 
				-        tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the paretne term type: '%type'", array(
			
 
				-          '%type' => $parentcvterm
			
 
				-        ));
			
 
				-        return 0;
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    // Third, if there is a relationship type then get that.
			
 
				-    if ($rel_type) {
			
 
				-      $relcvterm = chado_query($cvtermsql, array(':cvname' => 'sequence',':name' => $rel_type,':synonym' => $rel_type))->fetchObject();
			
 
				-      if (!$relcvterm) {
			
 
				-        tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the relationship term type: '%type'", array(
			
 
				-          '%type' => $relcvterm
			
 
				-        ));
			
 
				-        return 0;
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    // We need to get the table schema to make sure we don't overrun the
			
 
				-    // size of fields with what our regular expressions retrieve
			
 
				-    $feature_tbl = chado_get_schema('feature');
			
 
				-    $dbxref_tbl = chado_get_schema('dbxref');
			
 
				-
			
 
				-    print "Step 1: finding sequences\n";
			
 
				-    $filesize = filesize($dfile);
			
 
				-    $fh = fopen($dfile, 'r');
			
 
				-    if (!$fh) {
			
 
				-      tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "cannot open file: %dfile", array(
			
 
				-        '%dfile' => $dfile
			
 
				-      ));
			
 
				-      return 0;
			
 
				-    }
			
 
				-
			
 
				-    // Calculate the interval at which we will print to the screen that status.
			
 
				-    $interval = intval($filesize * 0.01);
			
 
				-    if ($interval < 1) {
			
 
				-      $interval = 1;
			
 
				-    }
			
 
				-    $inv_read = 0;
			
 
				-    $num_read = 0;
			
 
				-
			
 
				-    // Iterate through the lines of the file. Keep a record for
			
 
				-    // where in the file each line is at for later import.
			
 
				-    $seqs = array();
			
 
				-    $num_seqs = 0;
			
 
				-    $prev_pos = 0;
			
 
				-    $set_start = FALSE;
			
 
				-    while ($line = fgets($fh)) {
			
 
				-      $num_read += strlen($line);
			
 
				-      $intv_read += strlen($line);
			
 
				-
			
 
				-      // If we encounter a definition line then get the name, uniquename,
			
 
				-      // accession and relationship subject from the definition line.
			
 
				-      if (preg_match('/^>/', $line)) {
			
 
				-
			
 
				-        // Remove the > symbol from the defline.
			
 
				-        $defline = preg_replace("/^>/", '', $line);
			
 
				-
			
 
				-        // Get the feature name if a regular expression is provided.
			
 
				-        if ($re_name) {
			
 
				-          if (!preg_match("/$re_name/", $defline, $matches)) {
			
 
				-            tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature name finds nothing. Line %line.", array(
			
 
				-              '%line' => $i
			
 
				-            ), 'error');
			
 
				-          }
			
 
				-          elseif (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
			
 
				-            tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a value too long for the feature name. Line %line.", array(
			
 
				-              '%line' => $i
			
 
				-            ), 'error');
			
 
				-          }
			
 
				-          else {
			
 
				-            $name = trim($matches[1]);
			
 
				-          }
			
 
				-        }
			
 
				-        // If the match_type is name and no regular expression was provided
			
 
				-        // then use the first word as the name, otherwise we don't set the name.
			
 
				-        elseif (strcmp($match_type, 'Name') == 0) {
			
 
				-          if (preg_match("/^\s*(.*?)[\s\|].*$/", $defline, $matches)) {
			
 
				-            if (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
			
 
				-              tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a feature name too long for the feature name. Line %line.", array(
			
 
				-                '%line' => $i), 'error');
			
 
				-            }
			
 
				-            else {
			
 
				-              $name = trim($matches[1]);
			
 
				-            }
			
 
				-          }
			
 
				-          else {
			
 
				-            tripal_report_error('trp-fasta', "ERROR: Cannot find a feature name. Line %line.", array(
			
 
				-              '%line' => $i), 'error');
			
 
				-          }
			
 
				-        }
			
 
				-
			
 
				-        // Get the feature uniquename if a regular expression is provided.
			
 
				-        if ($re_uname) {
			
 
				-          if (!preg_match("/$re_uname/", $defline, $matches)) {
			
 
				-            tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature unique name finds nothing. Line %line.", array(
			
 
				-              '%line' => $i), 'error');
			
 
				-          }
			
 
				-          $uname = trim($matches[1]);
			
 
				-        }
			
 
				-        // If the match_type is name and no regular expression was provided
			
 
				-        // then use the first word as the name, otherwise, we don't set the
			
 
				-        // unqiuename.
			
 
				-        elseif (strcmp($match_type, 'Unique name') == 0) {
			
 
				-          if (preg_match("/^\s*(.*?)[\s\|].*$/", $defline, $matches)) {
			
 
				-            $uname = trim($matches[1]);
			
 
				-          }
			
 
				-          else {
			
 
				-            tripal_report_error('trp-fasta', "ERROR: Cannot find a feature unique name. Line %line.", array(
			
 
				-              '%line' => $i), 'error');
			
 
				-          }
			
 
				-        }
			
 
				-
			
 
				-        // Get the accession if a regular expression is provided.
			
 
				-        preg_match("/$re_accession/", $defline, $matches);
			
 
				-        if (strlen($matches[1]) > $dbxref_tbl['fields']['accession']['length']) {
			
 
				-          tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves an accession too long for the feature name. " .
			
 
				-             "Cannot add cross reference. Line %line.", array('%line' => $i
			
 
				-            ), 'warning');
			
 
				-        }
			
 
				-        else {
			
 
				-          $accession = trim($matches[1]);
			
 
				-        }
			
 
				-
			
 
				-        // Get the relationship subject
			
 
				-        preg_match("/$re_subject/", $line, $matches);
			
 
				-        $subject = trim($matches[1]);
			
 
				-
			
 
				-        // Add the details to the sequence.
			
 
				-        $seqs[$num_seqs] = array(
			
 
				-          'name' => $name,
			
 
				-          'uname' => $uname,
			
 
				-          'accession' => $accession,
			
 
				-          'subject' => $subject,
			
 
				-          'seq_start' => ftell($fh)
			
 
				-        );
			
 
				-        $set_start = TRUE;
			
 
				-        // If this isn't the first sequence, then we want to specify where
			
 
				-        // the previous sequence ended.
			
 
				-        if ($num_seqs > 0) {
			
 
				-          $seqs[$num_seqs - 1]['seq_end'] = $prev_pos;
			
 
				-        }
			
 
				-        $num_seqs++;
			
 
				-      }
			
 
				-      // Keep the current file position so we can use it to set the sequence
			
 
				-      // ending position
			
 
				-      $prev_pos = ftell($fh);
			
 
				-
			
 
				-      // update the job status every % bytes
			
 
				-      if ($job and $intv_read >= $interval) {
			
 
				-        $intv_read = 0;
			
 
				-        $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
			
 
				-        if ($name) {
			
 
				-          print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) .
			
 
				-             " bytes.\r";
			
 
				-        }
			
 
				-        else {
			
 
				-          print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) .
			
 
				-             " bytes.\r";
			
 
				-        }
			
 
				-        tripal_set_job_progress($job, intval(($num_read / $filesize) * 100));
			
 
				-      }
			
 
				-    }
			
 
				-    $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
			
 
				-    print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) .
			
 
				-       " bytes.\r";
			
 
				-    tripal_set_job_progress($job, 50);
			
 
				-
			
 
				-    // Set the end position for the last sequence.
			
 
				-    $seqs[$num_seqs - 1]['seq_end'] = $num_read - strlen($line);
			
 
				-
			
 
				-    // Now that we know where the sequences are in the file we need to add them.
			
 
				-    print "\nStep 2: Importing sequences\n";
			
 
				-    for ($i = 0; $i < $num_seqs; $i++) {
			
 
				-      $seq = $seqs[$i];
			
 
				-      print "Importing " . ($i + 1) . " of $num_seqs. ";
			
 
				-      if ($name) {
			
 
				-        print "Current feature: " . $seq['name'] . ".\n";
			
 
				-      }
			
 
				-      else {
			
 
				-        print "Current feature: " . $seq['uname'] . ".\n";
			
 
				-      }
			
 
				-
			
 
				-      tripal_feature_load_fasta_feature($fh, $seq['name'], $seq['uname'], $db_id, $seq['accession'], $seq['subject'], $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm, $source, $method, $re_name, $match_type, $parentcvterm, $relcvterm, $seq['seq_start'], $seq['seq_end']);
			
 
				-    }
			
 
				-    tripal_set_job_progress($job, 100);
			
 
				-    fclose($fh);
			
 
				-  }
			
 
				-  catch (Exception $e) {
			
 
				-    fclose($fh);
			
 
				-    $transaction->rollback();
			
 
				-    print "\n"; // make sure we start errors on new line
			
 
				-    watchdog_exception('T_fasta_loader', $e);
			
 
				-    print "FAILED: Rolling back database changes...\n";
			
 
				-  }
			
 
				-
			
 
				-  print "\nDone\n";
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * A helper function for tripal_feature_load_fasta() to load a single feature
			
 
				- *
			
 
				- * @ingroup fasta_loader
			
 
				- */
			
 
				-function tripal_feature_load_fasta_feature($fh, $name, $uname, $db_id, $accession, $parent,
			
 
				-  $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm, $source, $method, $re_name,
			
 
				-  $match_type, $parentcvterm, $relcvterm, $seq_start, $seq_end) {
			
 
				-
			
 
				-  // Check to see if this feature already exists if the match_type is 'Name'.
			
 
				-  if (strcmp($match_type, 'Name') == 0) {
			
 
				-    $values = array('organism_id' => $organism_id,'name' => $name,'type_id' => $cvterm->cvterm_id
			
 
				-    );
			
 
				-    $results = chado_select_record('feature', array('feature_id'
			
 
				-    ), $values);
			
 
				-    if (count($results) > 1) {
			
 
				-      tripal_report_error('T_fasta_loader', "Multiple features exist with the name '%name' of type
			
 
				-               '%type' for the organism.  skipping", array('%name' => $name,'%type' => $type));
			
 
				-      return 0;
			
 
				-    }
			
 
				-    if (count($results) == 1) {
			
 
				-      $feature = $results[0];
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // Check if this feature already exists if the match_type is 'Unique Name'.
			
 
				-  if (strcmp($match_type, 'Unique name') == 0) {
			
 
				-    $values = array(
			
 
				-      'organism_id' => $organism_id,
			
 
				-      'uniquename' => $uname,
			
 
				-      'type_id' => $cvterm->cvterm_id
			
 
				-    );
			
 
				-
			
 
				-    $results = chado_select_record('feature', array('feature_id'), $values);
			
 
				-    if (count($results) > 1) {
			
 
				-      tripal_report_error('T_fasta_loader', TRIPAL_WARNING, "Multiple features exist with the name '%name' of type '%type' for the organism.  skipping", array(
			
 
				-        '%name' => $name,'%type' => $type));
			
 
				-      return 0;
			
 
				-    }
			
 
				-    if (count($results) == 1) {
			
 
				-      $feature = $results[0];
			
 
				-    }
			
 
				-
			
 
				-    // If the feature exists but this is an "insert only" then skip.
			
 
				-    if ($feature and (strcmp($method, 'Insert only') == 0)) {
			
 
				-      tripal_report_error('T_fasta_loader', TRIPAL_WARNING, "Feature already exists '%name' ('%uname') while matching on %type. Skipping insert.", array(
			
 
				-        '%name' => $name,'%uname' => $uname,'%type' => drupal_strtolower($match_type)
			
 
				-      ));
			
 
				-      return 0;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // If we don't have a feature and we're doing an insert then do the insert.
			
 
				-  $inserted = 0;
			
 
				-  if (!$feature and (strcmp($method, 'Insert only') == 0 or strcmp($method, 'Insert and update') == 0)) {
			
 
				-    // If we have a unique name but not a name then set them to be the same
			
 
				-    if (!$uname) {
			
 
				-      $uname = $name;
			
 
				-    }
			
 
				-    elseif (!$name) {
			
 
				-      $name = $uname;
			
 
				-    }
			
 
				-
			
 
				-    // Insert the feature record.
			
 
				-    $values = array(
			
 
				-      'organism_id' => $organism_id,
			
 
				-      'name' => $name,
			
 
				-      'uniquename' => $uname,
			
 
				-      'type_id' => $cvterm->cvterm_id
			
 
				-    );
			
 
				-    $success = chado_insert_record('feature', $values);
			
 
				-    if (!$success) {
			
 
				-      tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to insert feature '%name (%uname)'", array(
			
 
				-        '%name' => $name,'%uname' => $numane));
			
 
				-      return 0;
			
 
				-    }
			
 
				-
			
 
				-    // now get the feature we just inserted
			
 
				-    $values = array(
			
 
				-      'organism_id' => $organism_id,
			
 
				-      'uniquename' => $uname,
			
 
				-      'type_id' => $cvterm->cvterm_id
			
 
				-    );
			
 
				-    $results = chado_select_record('feature', array('feature_id'), $values);
			
 
				-    if (count($results) == 1) {
			
 
				-      $inserted = 1;
			
 
				-      $feature = $results[0];
			
 
				-    }
			
 
				-    else {
			
 
				-      tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to retreive newly inserted feature '%name (%uname)'", array(
			
 
				-        '%name' => $name,'%uname' => $numane));
			
 
				-      return 0;
			
 
				-    }
			
 
				-
			
 
				-    // Add the residues for this feature
			
 
				-    tripal_feature_load_fasta_residues($fh, $feature->feature_id, $seq_start, $seq_end);
			
 
				-  }
			
 
				-
			
 
				-  // if we don't have a feature and the user wants to do an update then fail
			
 
				-  if (!$feature and (strcmp($method, 'Update only') == 0 or
			
 
				-     drupal_strcmp($method, 'Insert and update') == 0)) {
			
 
				-    tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to find feature '%name' ('%uname') while matching on " .
			
 
				-      drupal_strtolower($match_type), array('%name' => $name,'%uname' => $uname));
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  // if we do have a feature and this is an update then proceed with the update
			
 
				-  if ($feature and !$inserted and (strcmp($method, 'Update only') == 0 or
			
 
				-     strcmp($method, 'Insert and update') == 0)) {
			
 
				-
			
 
				-    // if the user wants to match on the Name field
			
 
				-    if (strcmp($match_type, 'Name') == 0) {
			
 
				-
			
 
				-      // if we're matching on the name but do not have a unique name then we
			
 
				-      // don't want to update the uniquename.
			
 
				-      $values = array();
			
 
				-      if ($uname) {
			
 
				-
			
 
				-        // First check to make sure that by changing the unique name of this
			
 
				-        // feature that we won't conflict with another existing feature of
			
 
				-        // the same name
			
 
				-        $values = array(
			
 
				-          'organism_id' => $organism_id,
			
 
				-          'uniquename' => $uname,
			
 
				-          'type_id' => $cvterm->cvterm_id
			
 
				-        );
			
 
				-        $results = chado_select_record('feature', array('feature_id'
			
 
				-        ), $values);
			
 
				-        if (count($results) > 0) {
			
 
				-          tripal_report_error('T_fasta_loader', "Cannot update the feature '%name' with a uniquename of '%uname' and type of '%type' as it
			
 
				-            conflicts with an existing feature with the same uniquename and type.", array(
			
 
				-            '%name' => $name,'%uname' => $uname,'%type' => $type
			
 
				-          ));
			
 
				-          return 0;
			
 
				-        }
			
 
				-
			
 
				-        // the changes to the uniquename don't conflict so proceed with the update
			
 
				-        $values = array('uniquename' => $uname);
			
 
				-        $match = array(
			
 
				-          'name' => $name,
			
 
				-          'organism_id' => $organism_id,
			
 
				-          'type_id' => $cvterm->cvterm_id
			
 
				-        );
			
 
				-
			
 
				-        // perform the update
			
 
				-        $success = chado_update_record('feature', $match, $values);
			
 
				-        if (!$success) {
			
 
				-          tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to update feature '%name' ('%name')", array(
			
 
				-            '%name' => $name,'%uiname' => $uname
			
 
				-          ));
			
 
				-          return 0;
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    // If the user wants to match on the unique name field.
			
 
				-    if (strcmp($match_type, 'Unique name') == 0) {
			
 
				-      // If we're matching on the uniquename and have a new name then
			
 
				-      // we want to update the name.
			
 
				-      $values = array();
			
 
				-      if ($name) {
			
 
				-        $values = array('name' => $name);
			
 
				-        $match = array(
			
 
				-          'uniquename' => $uname,
			
 
				-          'organism_id' => $organism_id,
			
 
				-          'type_id' => $cvterm->cvterm_id
			
 
				-        );
			
 
				-        $success = chado_update_record('feature', $match, $values);
			
 
				-        if (!$success) {
			
 
				-          tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to update feature '%name' ('%name')", array(
			
 
				-            '%name' => $name,'%uiname' => $uname
			
 
				-          ));
			
 
				-          return 0;
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // Update the residues for this feature
			
 
				-  tripal_feature_load_fasta_residues($fh, $feature->feature_id, $seq_start, $seq_end);
			
 
				-
			
 
				-  // add in the analysis link
			
 
				-  if ($analysis_id) {
			
 
				-    // if the association doens't alredy exist then add one
			
 
				-    $values = array(
			
 
				-      'analysis_id' => $analysis_id,
			
 
				-      'feature_id' => $feature->feature_id
			
 
				-    );
			
 
				-    $results = chado_select_record('analysisfeature', array('analysisfeature_id'), $values);
			
 
				-    if (count($results) == 0) {
			
 
				-      $success = chado_insert_record('analysisfeature', $values);
			
 
				-      if (!$success) {
			
 
				-        tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to associate analysis and feature '%name' ('%name')", array(
			
 
				-          '%name' => $name,'%uname' => $uname
			
 
				-        ));
			
 
				-        return 0;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // now add the database cross reference
			
 
				-  if ($db_id) {
			
 
				-    // check to see if this accession reference exists, if not add it
			
 
				-    $values = array(
			
 
				-      'db_id' => $db_id,
			
 
				-      'accession' => $accession
			
 
				-    );
			
 
				-    $results = chado_select_record('dbxref', array('dbxref_id'), $values);
			
 
				-    // if the accession doesn't exist then add it
			
 
				-    if (count($results) == 0) {
			
 
				-      $results = chado_insert_record('dbxref', $values);
			
 
				-      if (!$results) {
			
 
				-        tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to add database accession '%accession'", array(
			
 
				-          '%accession' => $accession));
			
 
				-        return 0;
			
 
				-      }
			
 
				-      $results = chado_select_record('dbxref', array('dbxref_id'), $values);
			
 
				-      if (count($results) == 1) {
			
 
				-        $dbxref = $results[0];
			
 
				-      }
			
 
				-      else {
			
 
				-        tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to retreive newly inserted dbxref '%name (%uname)'", array(
			
 
				-          '%name' => $name,'%uname' => $numane));
			
 
				-        return 0;
			
 
				-      }
			
 
				-    }
			
 
				-    else {
			
 
				-      $dbxref = $results[0];
			
 
				-    }
			
 
				-
			
 
				-    // check to see if the feature dbxref record exists if not, then add it
			
 
				-    $values = array(
			
 
				-      'feature_id' => $feature->feature_id,
			
 
				-      'dbxref_id' => $dbxref->dbxref_id
			
 
				-    );
			
 
				-    $results = chado_select_record('feature_dbxref', array('feature_dbxref_id'), $values);
			
 
				-    if (count($results) == 0) {
			
 
				-      $success = chado_insert_record('feature_dbxref', $values);
			
 
				-      if (!$success) {
			
 
				-        tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to add associate database accession '%accession' with feature", array(
			
 
				-          '%accession' => $accession
			
 
				-        ));
			
 
				-        return 0;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // now add in the relationship if one exists. If not, then add it
			
 
				-  if ($rel_type) {
			
 
				-    $values = array('organism_id' => $organism_id,'uniquename' => $parent,
			
 
				-      'type_id' => $parentcvterm->cvterm_id
			
 
				-    );
			
 
				-    $results = chado_select_record('feature', array('feature_id'
			
 
				-    ), $values);
			
 
				-    if (count($results) != 1) {
			
 
				-      tripal_report_error('T_fasta_loader', "Cannot find a unique feature for the parent '%parent' of type
			
 
				-               '%type' for the feature.", array(
			
 
				-        '%parent' => $parent,'%type' => $parent_type
			
 
				-      ));
			
 
				-      return 0;
			
 
				-    }
			
 
				-    $parent_feature = $results[0];
			
 
				-
			
 
				-    // check to see if the relationship already exists if not then add it
			
 
				-    $values = array(
			
 
				-      'subject_id' => $feature->feature_id,
			
 
				-      'object_id' => $parent_feature->feature_id,
			
 
				-      'type_id' => $relcvterm->cvterm_id
			
 
				-    );
			
 
				-    $results = chado_select_record('feature_relationship', array('feature_relationship_id'), $values);
			
 
				-    if (count($results) == 0) {
			
 
				-      $success = chado_insert_record('feature_relationship', $values);
			
 
				-      if (!$success) {
			
 
				-        tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to add associate database accession '%accession' with feature", array(
			
 
				-          '%accession' => $accession
			
 
				-        ));
			
 
				-        return 0;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Adds the residues column to the feature.
			
 
				- *
			
 
				- * This function seeks to the proper location in the file for the sequence
			
 
				- * and reads in chunks of sequence and appends them to the feature.residues
			
 
				- * column in the database.
			
 
				- *
			
 
				- * @param unknown $fh
			
 
				- * @param unknown $feature_id
			
 
				- * @param unknown $seq_start
			
 
				- * @param unknown $seq_end
			
 
				- */
			
 
				-function tripal_feature_load_fasta_residues($fh, $feature_id, $seq_start, $seq_end) {
			
 
				-
			
 
				-  // First position the file at the beginning of the sequence
			
 
				-  fseek($fh, $seq_start, SEEK_SET);
			
 
				-  $chunk_size = 100000000;
			
 
				-  $chunk = '';
			
 
				-  $seqlen = ($seq_end - $seq_start) + 1;
			
 
				-
			
 
				-  // Calculate the interval at which we updated the precent complete.
			
 
				-  $interval = intval($seqlen * 0.01);
			
 
				-  if ($interval < 1) {
			
 
				-    $interval = 1;
			
 
				-  }
			
 
				-  // We don't to repeat the update too often or it slows things down, so
			
 
				-  // if the interval is less than 1000 then bring it up to that.
			
 
				-  if ($interval < 100000) {
			
 
				-    $interval = 100000;
			
 
				-  }
			
 
				-  $chunk_intv_read = 0;
			
 
				-  $intv_read = 0;
			
 
				-  $num_read = 0;
			
 
				-  $total_seq_size = 0;
			
 
				-
			
 
				-  // First, make sure we don't have a null in the residues
			
 
				-  $sql = "UPDATE {feature} SET residues = '' WHERE feature_id = :feature_id";
			
 
				-  chado_query($sql, array(':feature_id' => $feature_id
			
 
				-  ));
			
 
				-
			
 
				-  // Read in the lines until we reach the end of the sequence. Once we
			
 
				-  // get a specific bytes read then append the sequence to the one in the
			
 
				-  // database.
			
 
				-  print "Sequence complete: 0%. Memory: " . number_format(memory_get_usage()) . " bytes. \r";
			
 
				-  while ($line = fgets($fh)) {
			
 
				-    $num_read += strlen($line) + 1;
			
 
				-    $chunk_intv_read += strlen($line) + 1;
			
 
				-    $intv_read += strlen($line) + 1;
			
 
				-    $chunk .= trim($line);
			
 
				-
			
 
				-    // If we've read in enough of the sequence then append it to the database.
			
 
				-    if ($chunk_intv_read >= $chunk_size) {
			
 
				-      $sql = "
			
 
				-        UPDATE {feature}
			
 
				-        SET residues = residues || :chunk
			
 
				-        WHERE feature_id = :feature_id
			
 
				-      ";
			
 
				-      $success = chado_query($sql, array(':feature_id' => $feature_id,':chunk' => $chunk
			
 
				-      ));
			
 
				-      if (!$success) {
			
 
				-        return FALSE;
			
 
				-      }
			
 
				-      $total_seq_size += strlen($chunk);
			
 
				-      $chunk = '';
			
 
				-      $chunk_intv_read = 0;
			
 
				-    }
			
 
				-    if ($intv_read >= $interval) {
			
 
				-      $percent = sprintf("%.2f", ($total_seq_size / $seqlen) * 100);
			
 
				-      print "Sequence complete: " . $percent . "%. Memory: " . number_format(memory_get_usage()) .
			
 
				-         " bytes. \r";
			
 
				-      $intv_read = 0;
			
 
				-    }
			
 
				-
			
 
				-    // If we've reached the ned of the sequence then break out of the loop
			
 
				-    if (ftell($fh) == $seq_end) {
			
 
				-      break;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // write the last bit of sequence if it remains
			
 
				-  if (strlen($chunk) > 0) {
			
 
				-    $sql = "
			
 
				-        UPDATE {feature}
			
 
				-        SET residues = residues || :chunk
			
 
				-        WHERE feature_id = :feature_id
			
 
				-      ";
			
 
				-    $success = chado_query($sql, array(':feature_id' => $feature_id,':chunk' => $chunk
			
 
				-    ));
			
 
				-    if (!$success) {
			
 
				-      return FALSE;
			
 
				-    }
			
 
				-    $total_seq_size += strlen($chunk);
			
 
				-    $chunk = '';
			
 
				-    $chunk_intv_read = 0;
			
 
				-  }
			
 
				-
			
 
				-  // Now update the seqlen and md5checksum fields
			
 
				-  $sql = "UPDATE {feature} SET seqlen = char_length(residues),  md5checksum = md5(residues) WHERE feature_id = :feature_id";
			
 
				-  chado_query($sql, array(':feature_id' => $feature_id
			
 
				-  ));
			
 
				-
			
 
				-  $percent = sprintf("%.2f", ($num_read / $seqlen) * 100);
			
 
				-  print "Sequence complete: " . $percent . "%. Memory: " . number_format(memory_get_usage()) .
			
 
				-     " bytes. \r";
			
 
				-}
			
--- a/tripal_chado/includes/loaders/tripal_chado.gff_loader.inc
+++ b/tripal_chado/includes/loaders/tripal_chado.gff_loader.inc
@@ -1,2319 +0,0 @@
 
				-<?php
			
 
				-/**
			
 
				- * @file
			
 
				- * Provides gff3 loading functionality. Creates features based on their specification
			
 
				- * in a GFF3 file.
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * @defgroup gff3_loader GFF3 Feature Loader
			
 
				- * @ingroup tripal_chado
			
 
				- * @{
			
 
				- * Provides gff3 loading functionality. Creates features based on their specification in a GFF3 file.
			
 
				- * @}
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * The form to submit a GFF3 loading job
			
 
				- *
			
 
				- * @ingroup gff3_loader
			
 
				- */
			
 
				-function tripal_feature_gff3_load_form() {
			
 
				-
			
 
				-  $form['gff_file']= array(
			
 
				-    '#type'          => 'textfield',
			
 
				-    '#title'         => t('GFF3 File'),
			
 
				-    '#description'   => t('Please enter the full system path for the GFF file, or a path within the Drupal
			
 
				-                           installation (e.g. /sites/default/files/xyz.gff).  The path must be accessible to the
			
 
				-                           server on which this Drupal instance is running.'),
			
 
				-    '#required' => TRUE,
			
 
				-  );
			
 
				-  // get the list of organisms
			
 
				-  $sql = "SELECT * FROM {organism} ORDER BY genus, species";
			
 
				-  $org_rset = chado_query($sql);
			
 
				-  $organisms = array();
			
 
				-  $organisms[''] = '';
			
 
				-  while ($organism = $org_rset->fetchObject()) {
			
 
				-    $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
			
 
				-  }
			
 
				-  $form['organism_id'] = array(
			
 
				-    '#title'       => t('Organism'),
			
 
				-    '#type'        => t('select'),
			
 
				-    '#description' => t("Choose the organism to which these sequences are associated"),
			
 
				-    '#required'    => TRUE,
			
 
				-    '#options'     => $organisms,
			
 
				-  );
			
 
				-
			
 
				-  // get the list of analyses
			
 
				-  $sql = "SELECT * FROM {analysis} ORDER BY name";
			
 
				-  $org_rset = chado_query($sql);
			
 
				-  $analyses = array();
			
 
				-  $analyses[''] = '';
			
 
				-  while ($analysis = $org_rset->fetchObject()) {
			
 
				-    $analyses[$analysis->analysis_id] = "$analysis->name ($analysis->program $analysis->programversion, $analysis->sourcename)";
			
 
				-  }
			
 
				-  $form['analysis_id'] = array(
			
 
				-   '#title'       => t('Analysis'),
			
 
				-   '#type'        => t('select'),
			
 
				-   '#description' => t("Choose the analysis to which these features are associated.
			
 
				-       Why specify an analysis for a data load?  All data comes
			
 
				-       from some place, even if downloaded from Genbank. By specifying
			
 
				-       analysis details for all data imports it allows an end user to reproduce the
			
 
				-       data set, but at least indicates the source of the data."),
			
 
				-   '#required'    => TRUE,
			
 
				-   '#options'     => $analyses,
			
 
				-  );
			
 
				-
			
 
				-  $form['line_number']= array(
			
 
				-    '#type'          => 'textfield',
			
 
				-    '#title'         => t('Start Line Number'),
			
 
				-    '#description'   => t('Enter the line number in the GFF file where you would like to begin processing.  The
			
 
				-      first line is line number 1.  This option is useful for examining loading problems with large GFF files.'),
			
 
				-    '#size' => 10,
			
 
				-  );
			
 
				-
			
 
				-  $form['landmark_type'] = array(
			
 
				-    '#title'       => t('Landmark Type'),
			
 
				-    '#type'        => t('textfield'),
			
 
				-    '#description' => t("Optional. Use this field to specify a Sequence Ontology type
			
 
				-       for the landmark sequences in the GFF fie (e.g. 'chromosome'). If the GFF file
			
 
				-       contains a '##sequence-region' line that describes the landmark sequences to
			
 
				-       which all others are aligned and a type is provided here then the features
			
 
				-       will be created if they do not already exist.  If they do exist then this
			
 
				-       field is not used."),
			
 
				-  );
			
 
				-
			
 
				-  $form['alt_id_attr'] = array(
			
 
				-    '#title'       => t('ID Attribute'),
			
 
				-    '#type'        => t('textfield'),
			
 
				-    '#description' => t("Optional. Sometimes lines in the GFF file are missing the
			
 
				-      required ID attribute that specifies the unique name of the feature, but there
			
 
				-      may be another attribute that can uniquely identify the feature.  If so,
			
 
				-      you may specify the name of the attribute to use for the name."),
			
 
				-  );
			
 
				-
			
 
				-  // Advanced Options
			
 
				-  $form['advanced'] = array(
			
 
				-    '#type' => 'fieldset',
			
 
				-    '#title' => t('Advanced Options'),
			
 
				-    '#collapsible' => TRUE,
			
 
				-    '#collapsed' => TRUE,
			
 
				-  );
			
 
				-
			
 
				-  $form['advanced']['protein_names'] = array(
			
 
				-    '#type' => 'fieldset',
			
 
				-    '#title' => t('Protein Names'),
			
 
				-    '#collapsible' => TRUE,
			
 
				-    '#collapsed' => FALSE,
			
 
				-    '#weight' => 5,
			
 
				-  );
			
 
				-
			
 
				-  $form['advanced']['protein_names']['re_help'] = array(
			
 
				-    '#type' => 'item',
			
 
				-    '#markup' => t('A regular expression is an advanced method for extracting information from a string of text.
			
 
				-                   If your GFF3 file does not contain polypeptide (or protein) features, but contains CDS features, proteins will be automatically created.
			
 
				-                   By default the loader will give each protein a name based on the name of the corresponding mRNA followed by the "-protein" suffix.
			
 
				-                   If you want to customize the name of the created protein, you can use the following regex.')
			
 
				-  );
			
 
				-  $form['advanced']['protein_names']['re_mrna'] = array(
			
 
				-    '#type' => 'textfield',
			
 
				-    '#title' => t('Regular expression for the mRNA name'),
			
 
				-    '#required' => FALSE,
			
 
				-    '#description' => t('Enter the regular expression that will extract portions of
			
 
				-       the mRNA unique name. For example, for a
			
 
				-       mRNA with a unique name finishing by -RX (e.g. SPECIES0000001-RA),
			
 
				-       the regular expression would be, "^(.*?)-R([A-Z]+)$".')
			
 
				-  );
			
 
				-  $form['advanced']['protein_names']['re_protein'] = array(
			
 
				-    '#type' => 'textfield',
			
 
				-    '#title' => t('Replacement string for the protein name'),
			
 
				-    '#required' => FALSE,
			
 
				-    '#description' => t('Enter the replacement string that will be used to create
			
 
				-       the protein name based on the mRNA regular expression. For example, for a
			
 
				-       mRNA regular expression "^(.*?)-R()[A-Z]+)$", the corresponding protein regular
			
 
				-       expression would be "$1-P$2".')
			
 
				-  );
			
 
				-
			
 
				-  $form['advanced']['import_options'] = array(
			
 
				-    '#type' => 'fieldset',
			
 
				-    '#title' => t('Import Options'),
			
 
				-    '#collapsible' => TRUE,
			
 
				-    '#collapsed' => FALSE,
			
 
				-    '#weight' => 0,
			
 
				-  );
			
 
				-
			
 
				-  $form['advanced']['import_options']['use_transaction']= array(
			
 
				-    '#type' => 'checkbox',
			
 
				-    '#title' => t('Use a transaction'),
			
 
				-    '#required' => FALSE,
			
 
				-    '#description' => t('Use a database transaction when loading the GFF file.  If an error occurs
			
 
				-      the entire datset loaded prior to the failure will be rolled back and will not be available
			
 
				-      in the database.  If this option is unchecked and failure occurs all records up to the point
			
 
				-      of failure will be present in the database.'),
			
 
				-    '#default_value' => 1,
			
 
				-  );
			
 
				-  $form['advanced']['import_options']['add_only']= array(
			
 
				-    '#type' => 'checkbox',
			
 
				-    '#title' => t('Import only new features'),
			
 
				-    '#required' => FALSE,
			
 
				-    '#description' => t('The job will skip features in the GFF file that already
			
 
				-                         exist in the database and import only new features.'),
			
 
				-  );
			
 
				-  $form['advanced']['import_options']['update']= array(
			
 
				-    '#type' => 'checkbox',
			
 
				-    '#title' => t('Import all and update'),
			
 
				-    '#required' => FALSE,
			
 
				-    '#default_value' => 'checked',
			
 
				-    '#description' => t('Existing features will be updated and new features will be added.  Attributes
			
 
				-                         for a feature that are not present in the GFF but which are present in the
			
 
				-                         database will not be altered.'),
			
 
				-    '#default_value' => 1,
			
 
				-  );
			
 
				-// SPF: there are bugs in refreshing and removing features.  The bugs arise
			
 
				-//      if a feature in the GFF does not have a uniquename. GenSAS will auto
			
 
				-//      generate this uniquename and it will not be the same as a previous
			
 
				-//      load because it uses the date.  This causes orphaned CDS/exons, UTRs
			
 
				-//      to be left behind during a delete or refresh.  So, the short term
			
 
				-//      fix is to remove these options.
			
 
				-//   $form['import_options']['refresh']= array(
			
 
				-//     '#type' => 'checkbox',
			
 
				-//     '#title' => t('Import all and replace'),
			
 
				-//     '#required' => FALSE,
			
 
				-//     '#description' => t('Existing features will be updated and feature properties not
			
 
				-//                          present in the GFF file will be removed.'),
			
 
				-//   );
			
 
				-//   $form['import_options']['remove']= array(
			
 
				-//     '#type' => 'checkbox',
			
 
				-//     '#title' => t('Delete features'),
			
 
				-//     '#required' => FALSE,
			
 
				-//     '#description' => t('Features present in the GFF file that exist in the database
			
 
				-//                          will be removed rather than imported'),
			
 
				-//   );
			
 
				-  $form['advanced']['import_options']['create_organism']= array(
			
 
				-    '#type' => 'checkbox',
			
 
				-    '#title' => t('Create organism'),
			
 
				-    '#required' => FALSE,
			
 
				-    '#description' => t('The Tripal GFF loader supports the "organism" attribute. This allows features of a
			
 
				-       different organism to be aligned to the landmark sequence of another species.  The format of the
			
 
				-       attribute is "organism=[genus]:[species]", where [genus] is the organism\'s genus and [species] is the
			
 
				-       species name. Check this box to automatically add the organism to the database if it does not already exists.
			
 
				-       Otherwise lines with an oraganism attribute where the organism is not present in the database will be skipped.'),
			
 
				-  );
			
 
				-
			
 
				-  $form['advanced']['targets'] = array(
			
 
				-    '#type' => 'fieldset',
			
 
				-    '#title' => t('Targets'),
			
 
				-    '#collapsible' => TRUE,
			
 
				-    '#collapsed' => FALSE,
			
 
				-    '#weight' => 1,
			
 
				-  );
			
 
				-  $form['advanced']['targets']['adesc'] = array(
			
 
				-    '#markup' => t("When alignments are represented in the GFF file (e.g. such as
			
 
				-       alignments of cDNA sequences to a whole genome, or blast matches), they are
			
 
				-       represented using two feature types: 'match' (or cDNA_match, EST_match, etc.)
			
 
				-       and 'match_part'.  These features may also have a 'Target' attribute to
			
 
				-       specify the sequence that is being aligned.
			
 
				-       However, the organism to which the aligned sequence belongs may not be present in the
			
 
				-       GFF file.  Here you can specify the organism and feature type of the target sequences.
			
 
				-       The options here will apply to all targets unless the organism and type are explicity
			
 
				-       set in the GFF file using the 'target_organism' and 'target_type' attributes."),
			
 
				-  );
			
 
				-  $form['advanced']['targets']['target_organism_id'] = array(
			
 
				-    '#title'       => t('Target Organism'),
			
 
				-    '#type'        => t('select'),
			
 
				-    '#description' => t("Optional. Choose the organism to which target sequences belong.
			
 
				-      Select this only if target sequences belong to a different organism than the
			
 
				-      one specified above. And only choose an organism here if all of the target sequences
			
 
				-      belong to the same species.  If the targets in the GFF file belong to multiple
			
 
				-      different species then the organism must be specified using the 'target_organism=genus:species'
			
 
				-      attribute in the GFF file."),
			
 
				-    '#options'     => $organisms,
			
 
				-  );
			
 
				-  $form['advanced']['targets']['target_type'] = array(
			
 
				-    '#title'       => t('Target Type'),
			
 
				-    '#type'        => t('textfield'),
			
 
				-    '#description' => t("Optional. If the unique name for a target sequence is not unique (e.g. a protein
			
 
				-       and an mRNA have the same name) then you must specify the type for all targets in the GFF file. If
			
 
				-       the targets are of different types then the type must be specified using the 'target_type=type' attribute
			
 
				-       in the GFF file. This must be a valid Sequence Ontology (SO) term."),
			
 
				-  );
			
 
				-  $form['advanced']['targets']['create_target']= array(
			
 
				-    '#type' => 'checkbox',
			
 
				-    '#title' => t('Create Target'),
			
 
				-    '#required' => FALSE,
			
 
				-    '#description' => t("If the target feature cannot be found, create one using the organism and type specified above, or
			
 
				-       using the 'target_organism' and 'target_type' fields specified in the GFF file.  Values specified in the
			
 
				-       GFF file take precedence over those specified above."),
			
 
				-  );
			
 
				-
			
 
				-  $form['button'] = array(
			
 
				-    '#type' => 'submit',
			
 
				-    '#value' => t('Import GFF3 file'),
			
 
				-    '#weight' => 10,
			
 
				-  );
			
 
				-
			
 
				-  return $form;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Validate the GFF3 loading job form
			
 
				- *
			
 
				- * @ingroup gff3_loader
			
 
				- */
			
 
				-function tripal_feature_gff3_load_form_validate($form, &$form_state) {
			
 
				-
			
 
				-  $gff_file = trim($form_state['values']['gff_file']);
			
 
				-  $organism_id = $form_state['values']['organism_id'];
			
 
				-  $target_organism_id = $form_state['values']['target_organism_id'];
			
 
				-  $target_type = trim($form_state['values']['target_type']);
			
 
				-  $create_target = $form_state['values']['create_target'];
			
 
				-  $create_organism = $form_state['values']['create_organism'];
			
 
				-  $add_only = $form_state['values']['add_only'];
			
 
				-  $update   = $form_state['values']['update'];
			
 
				-  $refresh  = 0; //$form_state['values']['refresh'];
			
 
				-  $remove   = 0; //$form_state['values']['remove'];
			
 
				-  $use_transaction   = $form_state['values']['use_transaction'];
			
 
				-  $line_number   = trim($form_state['values']['line_number']);
			
 
				-  $landmark_type   = trim($form_state['values']['landmark_type']);
			
 
				-  $alt_id_attr   = trim($form_state['values']['alt_id_attr']);
			
 
				-  $re_mrna = trim($form_state['values']['re_mrna']);
			
 
				-  $re_protein = trim($form_state['values']['re_protein']);
			
 
				-
			
 
				-
			
 
				-
			
 
				-  // check to see if the file is located local to Drupal
			
 
				-  $gff_file = trim($gff_file);
			
 
				-  $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $gff_file;
			
 
				-  if (!file_exists($dfile)) {
			
 
				-    // if not local to Drupal, the file must be someplace else, just use
			
 
				-    // the full path provided
			
 
				-    $dfile = $gff_file;
			
 
				-  }
			
 
				-  if (!file_exists($dfile)) {
			
 
				-    form_set_error('gff_file', t("Cannot find the file on the system. Check that the file exists or that the web server has permissions to read the file."));
			
 
				-  }
			
 
				-
			
 
				-  // @coder-ignore: there are no functions being called here
			
 
				-  if (($add_only AND ($update   OR $refresh  OR $remove)) OR
			
 
				-      ($update   AND ($add_only OR $refresh  OR $remove)) OR
			
 
				-      ($refresh  AND ($update   OR $add_only OR $remove)) OR
			
 
				-      ($remove   AND ($update   OR $refresh  OR $add_only))) {
			
 
				-    form_set_error('add_only', t("Please select only one checkbox from the import options section"));
			
 
				-  }
			
 
				-
			
 
				-  if ($line_number and !is_numeric($line_number) or $line_number < 0) {
			
 
				-    form_set_error('line_number', t("Please provide an integer line number greater than zero."));
			
 
				-  }
			
 
				-
			
 
				-  if (!($re_mrna and $re_protein) and ($re_mrna or $re_protein)) {
			
 
				-    form_set_error('re_uname', t("You must provide both a regular expression for mRNA and a replacement string for protein"));
			
 
				-  }
			
 
				-
			
 
				-  // check the regular expression to make sure it is valid
			
 
				-  set_error_handler(function() {}, E_WARNING);
			
 
				-  $result_re = preg_match("/" . $re_mrna . "/", null);
			
 
				-  $result = preg_replace("/" . $re_mrna . "/", $re_protein, null);
			
 
				-  restore_error_handler();
			
 
				-  if ($result_re === FALSE) {
			
 
				-    form_set_error('re_mrna', 'Invalid regular expression.');
			
 
				-  } else if ($result === FALSE) {
			
 
				-    form_set_error('re_protein', 'Invalid replacement string.');
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Submit the GFF3 loading job
			
 
				- *
			
 
				- * @ingroup gff3_loader
			
 
				- */
			
 
				-function tripal_feature_gff3_load_form_submit($form, &$form_state) {
			
 
				-  global $user;
			
 
				-
			
 
				-  $gff_file = trim($form_state['values']['gff_file']);
			
 
				-  $organism_id = $form_state['values']['organism_id'];
			
 
				-  $add_only = $form_state['values']['add_only'];
			
 
				-  $update   = $form_state['values']['update'];
			
 
				-  $refresh  = 0; //$form_state['values']['refresh'];
			
 
				-  $remove   = 0; //$form_state['values']['remove'];
			
 
				-  $analysis_id = $form_state['values']['analysis_id'];
			
 
				-  $use_transaction   = $form_state['values']['use_transaction'];
			
 
				-  $target_organism_id = $form_state['values']['target_organism_id'];
			
 
				-  $target_type = trim($form_state['values']['target_type']);
			
 
				-  $create_target = $form_state['values']['create_target'];
			
 
				-  $line_number   = trim($form_state['values']['line_number']);
			
 
				-  $landmark_type   = trim($form_state['values']['landmark_type']);
			
 
				-  $alt_id_attr   = trim($form_state['values']['alt_id_attr']);
			
 
				-  $create_organism = $form_state['values']['create_organism'];
			
 
				-  $re_mrna = trim($form_state['values']['re_mrna']);
			
 
				-  $re_protein = trim($form_state['values']['re_protein']);
			
 
				-
			
 
				-
			
 
				-  $args = array($gff_file, $organism_id, $analysis_id, $add_only,
			
 
				-    $update, $refresh, $remove, $use_transaction, $target_organism_id,
			
 
				-    $target_type, $create_target, $line_number, $landmark_type, $alt_id_attr,
			
 
				-    $create_organism, $re_mrna, $re_protein);
			
 
				-
			
 
				-  $type = '';
			
 
				-  if ($add_only) {
			
 
				-    $type = 'import only new features';
			
 
				-  }
			
 
				-  if ($update) {
			
 
				-    $type = 'import all and update';
			
 
				-  }
			
 
				-  if ($refresh) {
			
 
				-    $type = 'import all and replace';
			
 
				-  }
			
 
				-  if ($remove) {
			
 
				-    $type = 'delete features';
			
 
				-  }
			
 
				-  $fname = preg_replace("/.*\/(.*)/", "$1", $gff_file);
			
 
				-  $includes = array(
			
 
				-    module_load_include('inc', 'tripal_chado', 'includes/loaders/tripal_chado.gff_loader'),
			
 
				-  );
			
 
				-  tripal_add_job("$type GFF3 file: $fname", 'tripal_chado',
			
 
				-    'tripal_feature_load_gff3', $args, $user->uid, 10, $includes);
			
 
				-
			
 
				-  return '';
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Actually load a GFF3 file. This is the function called by tripal jobs
			
 
				- *
			
 
				- * @param $gff_file
			
 
				- *   The full path to the GFF file on the filesystem
			
 
				- * @param $organism_id
			
 
				- *   The organism_id of the organism to which the features in the GFF belong
			
 
				- * @param $analysis_id
			
 
				- *   The anlaysis_id of the analysis from which the features in the GFF were generated
			
 
				- * @param $add_only
			
 
				- *   Set to 1 if feature should be added only.  In the case where a feature
			
 
				- *   already exists, it will not be updated.  Default is 0
			
 
				- * @param $update
			
 
				- *   Set to 1 to update existing features. New features will be added. Attributes
			
 
				- *   for a feature that are not present in the GFF but which are present in the
			
 
				- *   database will not be altered. Default is 1
			
 
				- * @param $refresh
			
 
				- *   Set to 1 to update existing features. New features will be added. Attributes
			
 
				- *   for a feature that are not present in the GFF but which are present in the
			
 
				- *   database will be removed. Default is 0
			
 
				- * @param $remove
			
 
				- *   Set to 1 to remove features present in the GFF file that exist in the database.
			
 
				- *   Default is 0.
			
 
				- * @param $use_transaction
			
 
				- *   Set to 1 to use a transaction when loading the GFF. Any failure during
			
 
				- *   loading will result in the rollback of any changes. Default is 1.
			
 
				- * @param $target_organism_id
			
 
				- *   If the GFF file contains a 'Target' attribute then the feature and the
			
 
				- *   target will have an alignment created, but to find the proper target
			
 
				- *   feature the target organism must also be known.  If different from the
			
 
				- *   organism specified for the GFF file, then use  this argument to specify
			
 
				- *   the target organism.  Only use this argument if all target sequences belong
			
 
				- *   to the same species. If the targets in the GFF file belong to multiple
			
 
				- *   different species then the organism must be specified using the
			
 
				- *   'target_organism=genus:species' attribute in the GFF file. Default is NULL.
			
 
				- * @param $target_type
			
 
				- *   If the GFF file contains a 'Target' attribute then the feature and the
			
 
				- *   target will have an alignment created, but to find the proper target
			
 
				- *   feature the target organism must also be known.  This can be used to
			
 
				- *   specify the target feature type to help with identification of the target
			
 
				- *   feature.  Only use this argument if all target sequences types are the same.
			
 
				- *   If the targets are of different types then the type must be specified using
			
 
				- *   the 'target_type=type' attribute in the GFF file. This must be a valid
			
 
				- *   Sequence Ontology (SO) term. Default is NULL
			
 
				- * @param $create_target
			
 
				- *   Set to 1 to create the target feature if it cannot be found in the
			
 
				- *   database. Default is 0
			
 
				- * @param $start_line
			
 
				- *   Set this to the line in the GFF file where importing should start. This
			
 
				- *   is useful for testing and debugging GFF files that may have problems and
			
 
				- *   you want to start at a particular line to speed testing.  Default = 1
			
 
				- * @param $landmark_type
			
 
				- *   Use this argument to specify a Sequence Ontology term name for the landmark
			
 
				- *   sequences in the GFF fie (e.g. 'chromosome'), if the GFF file contains a
			
 
				- *   '##sequence-region' line that describes the landmark sequences. Default = ''
			
 
				- * @param $alt_id_attr
			
 
				- *   Sometimes lines in the GFF file are missing the required ID attribute that
			
 
				- *   specifies the unique name of the feature. If so, you may specify the
			
 
				- *   name of an existing attribute to use for the ID.
			
 
				- * @param $create_organism
			
 
				- *   The Tripal GFF loader supports the "organism" attribute. This allows
			
 
				- *   features of a different organism to be aligned to the landmark sequence of
			
 
				- *   another species. The format of the attribute is "organism=[genus]:[species]",
			
 
				- *   where [genus] is the organism's genus and [species] is the species name.
			
 
				- *   Check this box to automatically add the organism to the database if it does
			
 
				- *   not already exists. Otherwise lines with an oraganism attribute where the
			
 
				- *   organism is not present in the database will be skipped.
			
 
				- * @param $re_mrna A
			
 
				- *          regular expression to extract portions from mRNA id
			
 
				- * @param $re_protein A
			
 
				- *          replacement string to generate the protein id
			
 
				- * @param $job
			
 
				- *  The tripal job_id.  Only used by the Tripal Jobs subsystem.
			
 
				- *
			
 
				- * @ingroup gff3_loader
			
 
				- */
			
 
				-function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
			
 
				-  $add_only = 0, $update = 1, $refresh = 0, $remove = 0, $use_transaction = 1,
			
 
				-  $target_organism_id = NULL, $target_type = NULL,  $create_target = 0,
			
 
				-  $start_line = 1, $landmark_type = '', $alt_id_attr = '',  $create_organism = FALSE,
			
 
				-  $re_mrna = '', $re_protein = '', $job = NULL) {
			
 
				-
			
 
				-  $ret = array();
			
 
				-  $date = getdate();
			
 
				-
			
 
				-  // An array that stores CVterms that have been looked up so we don't have
			
 
				-  // to do the database query every time.
			
 
				-  $cvterm_lookup = array();
			
 
				-
			
 
				-  // An array that stores Landmarks that have been looked up so we don't have
			
 
				-  // to do the database query every time.
			
 
				-  $landmark_lookup = array();
			
 
				-
			
 
				-  // empty the temp tables
			
 
				-  $sql = "DELETE FROM {tripal_gff_temp}";
			
 
				-  chado_query($sql);
			
 
				-  $sql = "DELETE FROM {tripal_gffcds_temp}";
			
 
				-  chado_query($sql);
			
 
				-  $sql = "DELETE FROM {tripal_gffprotein_temp}";
			
 
				-  chado_query($sql);
			
 
				-
			
 
				-  // begin the transaction
			
 
				-  $transaction = null;
			
 
				-  if ($use_transaction) {
			
 
				-    $transaction = db_transaction();
			
 
				-    print "\nNOTE: Loading of this GFF file is performed using a database transaction. \n" .
			
 
				-         "If the load fails or is terminated prematurely then the entire set of \n" .
			
 
				-         "insertions/updates is rolled back and will not be found in the database\n\n";
			
 
				-  }
			
 
				-  try {
			
 
				-
			
 
				-    // check to see if the file is located local to Drupal
			
 
				-    $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $gff_file;
			
 
				-    if (!file_exists($dfile)) {
			
 
				-      // if not local to Drupal, the file must be someplace else, just use
			
 
				-      // the full path provided
			
 
				-      $dfile = $gff_file;
			
 
				-    }
			
 
				-    if (!file_exists($dfile)) {
			
 
				-      tripal_report_error('tripal_chado', TRIPAL_ERROR, "Cannot find the file: %dfile",
			
 
				-        array('%dfile' => $dfile));
			
 
				-      return 0;
			
 
				-    }
			
 
				-
			
 
				-    print "Opening $gff_file\n";
			
 
				-
			
 
				-    //$lines = file($dfile,FILE_SKIP_EMPTY_LINES);
			
 
				-    $fh = fopen($dfile, 'r');
			
 
				-    if (!$fh) {
			
 
				-      tripal_report_error('tripal_chado', TRIPAL_ERROR, "cannot open file: %dfile",
			
 
				-        array('%dfile' => $dfile));
			
 
				-      return 0;
			
 
				-    }
			
 
				-    $filesize = filesize($dfile);
			
 
				-
			
 
				-    // get the controlled vocaubulary that we'll be using.  The
			
 
				-    // default is the 'sequence' ontology
			
 
				-    $sql = "SELECT * FROM {cv} WHERE name = :cvname";
			
 
				-    $cv = chado_query($sql, array(':cvname' => 'sequence'))->fetchObject();
			
 
				-    if (!$cv) {
			
 
				-      tripal_report_error('tripal_chado', TRIPAL_ERROR,
			
 
				-        "Cannot find the 'sequence' ontology", array());
			
 
				-      return '';
			
 
				-    }
			
 
				-    // get the organism for which this GFF3 file belongs
			
 
				-    $sql = "SELECT * FROM {organism} WHERE organism_id = :organism_id";
			
 
				-    $organism = chado_query($sql, array(':organism_id' => $organism_id))->fetchObject();
			
 
				-
			
 
				-    $interval = intval($filesize * 0.0001);
			
 
				-    if ($interval == 0) {
			
 
				-      $interval = 1;
			
 
				-    }
			
 
				-    $in_fasta = 0;
			
 
				-    $line_num = 0;
			
 
				-    $num_read = 0;
			
 
				-    $intv_read = 0;
			
 
				-
			
 
				-    // prepare the statement used to get the cvterm for each feature.
			
 
				-    $sel_cvterm_sql = "
			
 
				-      SELECT CVT.cvterm_id, CVT.cv_id, CVT.name, CVT.definition,
			
 
				-        CVT.dbxref_id, CVT.is_obsolete, CVT.is_relationshiptype
			
 
				-      FROM {cvterm} CVT
			
 
				-        INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
			
 
				-        LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
			
 
				-      WHERE CV.cv_id = :cv_id and
			
 
				-       (lower(CVT.name) = lower(:name) or lower(CVTS.synonym) = lower(:synonym))
			
 
				-     ";
			
 
				-
			
 
				-    // If a landmark type was provided then pre-retrieve that.
			
 
				-    if ($landmark_type) {
			
 
				-      $query = array(
			
 
				-        ':cv_id' => $cv->cv_id,
			
 
				-        ':name' => $landmark_type,
			
 
				-        ':synonym' => $landmark_type
			
 
				-      );
			
 
				-      $result = chado_query($sel_cvterm_sql, $query);
			
 
				-      $landmark_cvterm = $result->fetchObject();
			
 
				-      if (!$landmark_cvterm) {
			
 
				-        tripal_report_error('tripal_chado', TRIPAL_ERROR,
			
 
				-          'cannot find landmark feature type \'%landmark_type\'.',
			
 
				-          array('%landmark_type' => $landmark_type));
			
 
				-        return '';
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    // iterate through each line of the GFF file
			
 
				-    print "Parsing Line $line_num (0.00%). Memory: " . number_format(memory_get_usage()) . " bytes\r";
			
 
				-    while ($line = fgets($fh)) {
			
 
				-      $line_num++;
			
 
				-      $size = drupal_strlen($line);
			
 
				-      $num_read += $size;
			
 
				-      $intv_read += $size;
			
 
				-
			
 
				-      if ($line_num < $start_line) {
			
 
				-        continue;
			
 
				-      }
			
 
				-
			
 
				-      // update the job status every 1% features
			
 
				-      if ($job and $intv_read >= $interval) {
			
 
				-        $intv_read = 0;
			
 
				-        $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
			
 
				-        print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
			
 
				-        tripal_set_job_progress($job, intval(($num_read / $filesize) * 100));
			
 
				-      }
			
 
				-
			
 
				-      // check to see if we have FASTA section, if so then set the variable
			
 
				-      // to start parsing
			
 
				-      if (preg_match('/^##FASTA/i', $line)) {
			
 
				-        print "Parsing FASTA portion...\n";
			
 
				-        if ($remove) {
			
 
				-          // we're done because this is a delete operation so break out of the loop.
			
 
				-          break;
			
 
				-        }
			
 
				-        tripal_feature_load_gff3_fasta($fh, $interval, $num_read, $intv_read, $line_num, $filesize, $job);
			
 
				-        continue;
			
 
				-      }
			
 
				-      // if the ##sequence-region line is present then we want to add a new feature
			
 
				-      if (preg_match('/^##sequence-region (.*?) (\d+) (\d+)$/i', $line, $region_matches)) {
			
 
				-        $rid = $region_matches[1];
			
 
				-        $rstart = $region_matches[2];
			
 
				-        $rend = $region_matches[3];
			
 
				-        if ($landmark_type) {
			
 
				-          tripal_feature_load_gff3_feature($organism, $analysis_id, $landmark_cvterm, $rid,
			
 
				-            $rid, '', 'f', 'f', 1, 0);
			
 
				-        }
			
 
				-        continue;
			
 
				-      }
			
 
				-
			
 
				-      // skip comments
			
 
				-      if (preg_match('/^#/', $line)) {
			
 
				-        continue;
			
 
				-      }
			
 
				-
			
 
				-      // skip empty lines
			
 
				-      if (preg_match('/^\s*$/', $line)) {
			
 
				-        continue;
			
 
				-      }
			
 
				-
			
 
				-      // get the columns
			
 
				-      $cols = explode("\t", $line);
			
 
				-      if (sizeof($cols) != 9) {
			
 
				-        tripal_report_error('tripal_chado', TRIPAL_ERROR, 'improper number of columns on line %line_num',
			
 
				-          array('%line_num' => $line_num));
			
 
				-        return '';
			
 
				-      }
			
 
				-
			
 
				-      // get the column values
			
 
				-      $landmark = $cols[0];
			
 
				-      $source   = $cols[1];
			
 
				-      $type     = $cols[2];
			
 
				-      $start    = $cols[3];
			
 
				-      $end      = $cols[4];
			
 
				-      $score    = $cols[5];
			
 
				-      $strand   = $cols[6];
			
 
				-      $phase    = $cols[7];
			
 
				-      $attrs    = explode(";", $cols[8]);  // split by a semicolon
			
 
				-
			
 
				-      // ready the start and stop for chado.  Chado expects these positions
			
 
				-      // to be zero-based, so we substract 1 from the fmin
			
 
				-      $fmin = $start - 1;
			
 
				-      $fmax = $end;
			
 
				-      if ($end < $start) {
			
 
				-        $fmin = $end - 1;
			
 
				-        $fmax = $start;
			
 
				-      }
			
 
				-
			
 
				-      // format the strand for chado
			
 
				-      if (strcmp($strand, '.') == 0) {
			
 
				-        $strand = 0;
			
 
				-      }
			
 
				-      elseif (strcmp($strand, '+') == 0) {
			
 
				-        $strand = 1;
			
 
				-      }
			
 
				-      elseif (strcmp($strand, '-') == 0) {
			
 
				-        $strand = -1;
			
 
				-      }
			
 
				-      if (strcmp($phase, '.') == 0) {
			
 
				-        $phase = '';
			
 
				-      }
			
 
				-      if (array_key_exists($type, $cvterm_lookup)) {
			
 
				-        $cvterm = $cvterm_lookup[$type];
			
 
				-      }
			
 
				-      else {
			
 
				-        $result = chado_query($sel_cvterm_sql, array(':cv_id' => $cv->cv_id, ':name' => $type, ':synonym' => $type));
			
 
				-        $cvterm = $result->fetchObject();
			
 
				-        $cvterm_lookup[$type] = $cvterm;
			
 
				-        if (!$cvterm) {
			
 
				-          tripal_report_error('tripal_chado', TRIPAL_ERROR, 'cannot find feature term \'%type\' on line %line_num of the GFF file',
			
 
				-            array('%type' => $type, '%line_num' => $line_num));
			
 
				-          return '';
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      // break apart each of the attributes
			
 
				-      $tags = array();
			
 
				-      $attr_name = '';
			
 
				-      $attr_uniquename = '';
			
 
				-      $attr_residue_info = '';
			
 
				-      $attr_locgroup = 0;
			
 
				-      $attr_fmin_partial = 'f';
			
 
				-      $attr_fmax_partial = 'f';
			
 
				-      $attr_is_obsolete = 'f';
			
 
				-      $attr_is_analysis = 'f';
			
 
				-      $attr_others = [];
			
 
				-      $residues = '';
			
 
				-
			
 
				-      // the organism to which a feature belongs can be set in the GFF
			
 
				-      // file using the 'organism' attribute.  By default we
			
 
				-      // set the $feature_organism variable to the default organism for the landmark
			
 
				-      $attr_organism = '';
			
 
				-      $feature_organism = $organism;
			
 
				-
			
 
				-      foreach ($attrs as $attr) {
			
 
				-        $attr = rtrim($attr);
			
 
				-        $attr = ltrim($attr);
			
 
				-        if (strcmp($attr, '')==0) {
			
 
				-          continue;
			
 
				-        }
			
 
				-        if (!preg_match('/^[^\=]+\=.+$/', $attr)) {
			
 
				-          tripal_report_error('tripal_chado', TRIPAL_ERROR, 'Attribute is not correctly formatted on line %line_num: %attr',
			
 
				-            array('%line_num' => $line_num, '%attr' => $attr));
			
 
				-          return '';
			
 
				-        }
			
 
				-
			
 
				-        // break apart each tag
			
 
				-        $tag = preg_split("/=/", $attr, 2);  // split by equals sign
			
 
				-
			
 
				-        // multiple instances of an attribute are separated by commas
			
 
				-        $tag_name = $tag[0];
			
 
				-        if (!array_key_exists($tag_name, $tags)) {
			
 
				-          $tags[$tag_name] = array();
			
 
				-        }
			
 
				-        $tags[$tag_name] = array_merge($tags[$tag_name], explode(",", $tag[1]));  // split by comma
			
 
				-
			
 
				-
			
 
				-        // replace the URL escape codes for each tag
			
 
				-        for ($i = 0; $i < count($tags[$tag_name]); $i++) {
			
 
				-          $tags[$tag_name][$i] = urldecode($tags[$tag_name][$i]);
			
 
				-        }
			
 
				-
			
 
				-        // get the name and ID tags
			
 
				-        $skip_feature = 0;  // if there is a problem with any of the attributes this variable gets set
			
 
				-        if (strcmp($tag_name, 'ID') == 0) {
			
 
				-          $attr_uniquename =  urldecode($tag[1]);
			
 
				-        }
			
 
				-        elseif (strcmp($tag_name, 'Name') == 0) {
			
 
				-          $attr_name =  urldecode($tag[1]);
			
 
				-        }
			
 
				-        elseif (strcmp($tag_name, 'organism') == 0) {
			
 
				-          $attr_organism = urldecode($tag[1]);
			
 
				-          $org_matches = array();
			
 
				-          if (preg_match('/^(.*?):(.*?)$/', $attr_organism, $org_matches)) {
			
 
				-            $values = array(
			
 
				-              'genus' => $org_matches[1],
			
 
				-              'species' => $org_matches[2],
			
 
				-            );
			
 
				-            $org = chado_select_record('organism', array("*"), $values);
			
 
				-            if (count($org) == 0) {
			
 
				-              if ($create_organism) {
			
 
				-                $feature_organism = (object) chado_insert_record('organism', $values);
			
 
				-                if (!$feature_organism) {
			
 
				-                  tripal_report_error('tripal_chado', TRIPAL_ERROR, "Could not add the organism, '%org', from line %line. Skipping this line. ",
			
 
				-                    array('%org' => $attr_organism, '%line' => $line_num));
			
 
				-                  $skip_feature = 1;
			
 
				-                }
			
 
				-              }
			
 
				-              else {
			
 
				-                tripal_report_error('tripal_chado', TRIPAL_ERROR, "The organism attribute '%org' on line %line does not exist. Skipping this line. ",
			
 
				-                  array('%org' => $attr_organism, '%line' => $line_num));
			
 
				-                $skip_feature = 1;
			
 
				-              }
			
 
				-            }
			
 
				-            else {
			
 
				-              // We found the organism in the database so use it.
			
 
				-              $feature_organism = $org[0];
			
 
				-            }
			
 
				-          }
			
 
				-          else {
			
 
				-            tripal_report_error('tripal_chado', TRIPAL_ERROR, "The organism attribute '%org' on line %line is not properly formated. It " .
			
 
				-              "should be of the form: organism=Genus:species.  Skipping this line.",
			
 
				-              array('%org' => $attr_organism, '%line' => $line_num));
			
 
				-            $skip_feature = 1;
			
 
				-          }
			
 
				-        }
			
 
				-        // Get the list of non-reserved attributes.
			
 
				-        elseif (strcmp($tag_name, 'Alias') != 0        and strcmp($tag_name, 'Parent') != 0 and
			
 
				-                strcmp($tag_name, 'Target') != 0       and strcmp($tag_name, 'Gap') != 0 and
			
 
				-                strcmp($tag_name, 'Derives_from') != 0 and strcmp($tag_name, 'Note') != 0 and
			
 
				-                strcmp($tag_name, 'Dbxref') != 0       and strcmp($tag_name, 'Ontology_term') != 0 and
			
 
				-                strcmp($tag_name, 'Is_circular') != 0  and strcmp($tag_name, 'target_organism') != 0 and
			
 
				-                strcmp($tag_name, 'target_type') != 0  and strcmp($tag_name, 'organism' != 0)) {
			
 
				-          foreach ($tags[$tag_name] as $value) {
			
 
				-            $attr_others[$tag_name][] = $value;
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-      // If neither name nor uniquename are provided then generate one.
			
 
				-      if (!$attr_uniquename and !$attr_name) {
			
 
				-        // Check if an alternate ID field is suggested, if so, then use
			
 
				-        // that for the name.
			
 
				-        if (array_key_exists($alt_id_attr, $tags)) {
			
 
				-          $attr_uniquename = $tags[$alt_id_attr][0];
			
 
				-          $attr_name = $attr_uniquename;
			
 
				-        }
			
 
				-        // If the row has a parent then generate a uniquename using the parent name
			
 
				-        // add the date to the name in the event there are more than one child with
			
 
				-        // the same parent.
			
 
				-        elseif (array_key_exists('Parent', $tags)) {
			
 
				-          $attr_uniquename = $tags['Parent'][0] . "-$type-$landmark-" . $date[0] . ":" . ($fmin + 1) . ".." . $fmax;
			
 
				-          $attr_name = $attr_uniquename;
			
 
				-        }
			
 
				-        // Generate a unique name based on the date, type and location
			
 
				-        // and set the name to simply be the type.
			
 
				-        else {
			
 
				-          $attr_uniquename = $date[0] . "-$type-$landmark:" . ($fmin + 1) . ".." . $fmax;
			
 
				-          $attr_name = $type;
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      // If a name is not specified then use the unique name as the name
			
 
				-      if (strcmp($attr_name, '') == 0) {
			
 
				-        $attr_name = $attr_uniquename;
			
 
				-      }
			
 
				-
			
 
				-      // If an ID attribute is not specified then we must generate a
			
 
				-      // unique ID. Do this by combining the attribute name with the date
			
 
				-      // and line number.
			
 
				-      if (!$attr_uniquename) {
			
 
				-        $attr_uniquename = $attr_name . '-' . $date[0] . '-' . $line_num;
			
 
				-      }
			
 
				-
			
 
				-      // Make sure the landmark sequence exists in the database.  If the user
			
 
				-      // has not specified a landmark type (and it's not required in the GFF
			
 
				-      // format) then we don't know the type of the landmark so we'll hope
			
 
				-      // that it's unique across all types for the organism. Only do this
			
 
				-      // test if the landmark and the feature are different.
			
 
				-      if (!$remove and !(strcmp($landmark, $attr_uniquename) == 0 or strcmp($landmark, $attr_name) == 0) and !in_array($landmark, $landmark_lookup)) {
			
 
				-
			
 
				-        $select = array(
			
 
				-          'organism_id' => $organism->organism_id,
			
 
				-          'uniquename'  => $landmark,
			
 
				-        );
			
 
				-        $columns = array('count(*) as num_landmarks');
			
 
				-        if ($landmark_type) {
			
 
				-          $select['type_id'] = array(
			
 
				-            'name' => $landmark_type,
			
 
				-          );
			
 
				-        }
			
 
				-        $count = chado_select_record('feature', $columns, $select);
			
 
				-        if (!$count or count($count) == 0 or $count[0]->num_landmarks == 0) {
			
 
				-          // now look for the landmark using the name rather than uniquename.
			
 
				-          $select = array(
			
 
				-            'organism_id' => $organism->organism_id,
			
 
				-            'name'  => $landmark,
			
 
				-          );
			
 
				-          $columns = array('count(*) as num_landmarks');
			
 
				-          if ($landmark_type) {
			
 
				-            $select['type_id'] = array(
			
 
				-              'name' => $landmark_type,
			
 
				-            );
			
 
				-          }
			
 
				-          $count = chado_select_record('feature', $columns, $select);
			
 
				-          if (!$count or count($count) == 0 or $count[0]->num_landmarks == 0) {
			
 
				-            tripal_report_error('tripal_chado', TRIPAL_ERROR, "The landmark '%landmark' cannot be found for this organism (%species) " .
			
 
				-                  "Please add the landmark and then retry the import of this GFF3 " .
			
 
				-                  "file", array('%landmark' => $landmark, '%species' => $organism->genus . " " . $organism->species));
			
 
				-            return '';
			
 
				-          }
			
 
				-          elseif ($count[0]->num_landmarks > 1) {
			
 
				-            tripal_report_error('tripal_chado', TRIPAL_ERROR, "The landmark '%landmark' has more than one entry for this organism (%species) " .
			
 
				-                  "Cannot continue", array('%landmark' => $landmark, '%species' => $organism->genus . " " . $organism->species));
			
 
				-            return '';
			
 
				-          }
			
 
				-
			
 
				-        }
			
 
				-        if ($count[0]->num_landmarks > 1) {
			
 
				-          tripal_report_error('tripal_chado', TRIPAL_ERROR, "The landmark '%landmark' is not unique for this organism. " .
			
 
				-                "The features cannot be associated", array('%landmark' => $landmark));
			
 
				-          return '';
			
 
				-        }
			
 
				-
			
 
				-        // The landmark was found, remember it
			
 
				-        $landmark_lookup[] = $landmark;
			
 
				-      }
			
 
				-/*
			
 
				-      // If the option is to remove or refresh then we want to remove
			
 
				-      // the feature from the database.
			
 
				-      if ($remove or $refresh) {
			
 
				-        // Next remove the feature itself.
			
 
				-        $sql = "DELETE FROM {feature}
			
 
				-                WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
			
 
				-        $match = array(
			
 
				-          'organism_id' => $feature_organism->organism_id,
			
 
				-          'uniquename'  => $attr_uniquename,
			
 
				-          'type_id'     => $cvterm->cvterm_id
			
 
				-        );
			
 
				-        $result = chado_delete_record('feature', $match);
			
 
				-        if (!$result) {
			
 
				-          tripal_report_error('tripal_chado', TRIPAL_ERROR, "cannot delete feature %attr_uniquename",
			
 
				-            array('%attr_uniquename' => $attr_uniquename));
			
 
				-        }
			
 
				-        $feature = 0;
			
 
				-        unset($result);
			
 
				-      }
			
 
				- */
			
 
				-      // Add or update the feature and all properties.
			
 
				-      if ($update or $refresh or $add_only) {
			
 
				-
			
 
				-        // Add/update the feature.
			
 
				-        $feature = tripal_feature_load_gff3_feature($feature_organism, $analysis_id, $cvterm,
			
 
				-          $attr_uniquename, $attr_name, $residues, $attr_is_analysis,
			
 
				-          $attr_is_obsolete, $add_only, $score);
			
 
				-
			
 
				-        if ($feature) {
			
 
				-
			
 
				-          // Add a record for this feature to the tripal_gff_temp table for
			
 
				-          // later lookup.
			
 
				-          $values = array(
			
 
				-            'feature_id' => $feature->feature_id,
			
 
				-            'organism_id' => $feature->organism_id,
			
 
				-            'type_name' => $type,
			
 
				-            'uniquename' => $feature->uniquename
			
 
				-          );
			
 
				-          // make sure this record doesn't already exist in our temp table
			
 
				-          $results = chado_select_record('tripal_gff_temp', array('*'), $values);
			
 
				-
			
 
				-          if (count($results) == 0) {
			
 
				-            $result = chado_insert_record('tripal_gff_temp', $values);
			
 
				-            if (!$result) {
			
 
				-              tripal_report_error('tripal_chado', TRIPAL_ERROR, "Cound not save record in temporary table, Cannot continue.", array());
			
 
				-              exit;
			
 
				-            }
			
 
				-          }
			
 
				-          // add/update the featureloc if the landmark and the ID are not the same
			
 
				-          // if they are the same then this entry in the GFF is probably a landmark identifier
			
 
				-          if (strcmp($landmark, $attr_uniquename) !=0 ) {
			
 
				-            tripal_feature_load_gff3_featureloc($feature, $organism,
			
 
				-              $landmark, $fmin, $fmax, $strand, $phase, $attr_fmin_partial,
			
 
				-              $attr_fmax_partial, $attr_residue_info, $attr_locgroup);
			
 
				-          }
			
 
				-
			
 
				-          // add any aliases for this feature
			
 
				-          if (array_key_exists('Alias', $tags)) {
			
 
				-            tripal_feature_load_gff3_alias($feature, $tags['Alias']);
			
 
				-          }
			
 
				-          // add any dbxrefs for this feature
			
 
				-          if (array_key_exists('Dbxref', $tags)) {
			
 
				-            tripal_feature_load_gff3_dbxref($feature, $tags['Dbxref']);
			
 
				-          }
			
 
				-          // add any ontology terms for this feature
			
 
				-          if (array_key_exists('Ontology_term', $tags)) {
			
 
				-            tripal_feature_load_gff3_ontology($feature, $tags['Ontology_term']);
			
 
				-          }
			
 
				-          // add parent relationships
			
 
				-          if (array_key_exists('Parent', $tags)) {
			
 
				-            tripal_feature_load_gff3_parents($feature, $cvterm, $tags['Parent'],
			
 
				-              $feature_organism->organism_id, $strand, $phase, $fmin, $fmax);
			
 
				-          }
			
 
				-
			
 
				-          // add target relationships
			
 
				-          if (array_key_exists('Target', $tags)) {
			
 
				-            tripal_feature_load_gff3_target($feature, $tags, $target_organism_id, $target_type, $create_target, $attr_locgroup);
			
 
				-          }
			
 
				-          // add gap information.  This goes in simply as a property
			
 
				-          if (array_key_exists('Gap', $tags)) {
			
 
				-            foreach ($tags['Gap'] as $value) {
			
 
				-              tripal_feature_load_gff3_property($feature, 'Gap', $value);
			
 
				-            }
			
 
				-          }
			
 
				-          // add notes. This goes in simply as a property
			
 
				-          if (array_key_exists('Note', $tags)) {
			
 
				-            foreach ($tags['Note'] as $value) {
			
 
				-                tripal_feature_load_gff3_property($feature, 'Note', $value);
			
 
				-            }
			
 
				-          }
			
 
				-          // add the Derives_from relationship (e.g. polycistronic genes).
			
 
				-          if (array_key_exists('Derives_from', $tags)) {
			
 
				-            tripal_feature_load_gff3_derives_from($feature, $cvterm, $tags['Derives_from'][0],
			
 
				-              $feature_organism, $fmin, $fmax);
			
 
				-          }
			
 
				-          // add in the GFF3_source dbxref so that GBrowse can find the feature using the source column
			
 
				-          $source_ref = array('GFF_source:' . $source);
			
 
				-          tripal_feature_load_gff3_dbxref($feature, $source_ref);
			
 
				-          // add any additional attributes
			
 
				-          if ($attr_others) {
			
 
				-            foreach ($attr_others as $tag_name => $values) {
			
 
				-              foreach ($values as $value) {
			
 
				-                tripal_feature_load_gff3_property($feature, $tag_name, $value);
			
 
				-              }
			
 
				-            }
			
 
				-          }
			
 
				-
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    // Do some last bit of processing.
			
 
				-    if (!$remove) {
			
 
				-
			
 
				-      // First, add any protein sequences if needed.
			
 
				-      $sql = "SELECT feature_id FROM {tripal_gffcds_temp} LIMIT 1 OFFSET 1";
			
 
				-      $has_cds = chado_query($sql)->fetchField();
			
 
				-      if ($has_cds) {
			
 
				-        print "\nAdding protein sequences if CDS exist and no proteins in GFF...\n";
			
 
				-        $sql = "
			
 
				-          SELECT F.feature_id, F.name, F.uniquename, TGCT.strand,
			
 
				-            CVT.cvterm_id, CVT.name as feature_type,
			
 
				-            min(TGCT.fmin) as fmin, max(TGCT.fmax) as fmax,
			
 
				-            TGPT.feature_id as protein_id, TGPT.fmin as protein_fmin,
			
 
				-            TGPT.fmax as protein_fmax, FLM.uniquename as landmark
			
 
				-          FROM {tripal_gffcds_temp} TGCT
			
 
				-            INNER JOIN {feature} F on F.feature_id = TGCT.parent_id
			
 
				-            INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
			
 
				-            INNER JOIN {featureloc} L on F.feature_id = L.feature_id
			
 
				-            INNER JOIN {feature} FLM on L.srcfeature_id = FLM.feature_id
			
 
				-            LEFT JOIN {tripal_gffprotein_temp} TGPT on TGPT.parent_id = F.feature_id
			
 
				-          GROUP BY F.feature_id, F.name, F.uniquename, CVT.cvterm_id, CVT.name,
			
 
				-            TGPT.feature_id, TGPT.fmin, TGPT.fmax, TGCT.strand, FLM.uniquename
			
 
				-        ";
			
 
				-        $results = chado_query($sql);
			
 
				-        $protein_cvterm = tripal_get_cvterm(array(
			
 
				-          'name' => 'polypeptide',
			
 
				-          'cv_id' => array(
			
 
				-            'name' => 'sequence'
			
 
				-          )
			
 
				-        ));
			
 
				-        while ($result = $results->fetchObject()) {
			
 
				-          // If a protein exists with this same parent then don't add a new
			
 
				-          // protein.
			
 
				-          if (!$result->protein_id) {
			
 
				-            // Get details about this protein
			
 
				-            if ($re_mrna and $re_protein) {
			
 
				-              // We use a regex to generate protein name from mRNA name
			
 
				-              $uname = preg_replace("/$re_mrna/", $re_protein, $result->uniquename);
			
 
				-              $name =  $result->name;
			
 
				-            }
			
 
				-            else {
			
 
				-              // No regex, use the default '-protein' suffix
			
 
				-              $uname = $result->uniquename . '-protein';
			
 
				-              $name =  $result->name;
			
 
				-            }
			
 
				-            $values = array(
			
 
				-              'parent_id' => $result->feature_id,
			
 
				-              'fmin' => $result->fmin
			
 
				-            );
			
 
				-            $min_phase = chado_select_record('tripal_gffcds_temp', array('phase'), $values);
			
 
				-            $values = array(
			
 
				-              'parent_id' => $result->feature_id,
			
 
				-              'fmax' => $result->fmax
			
 
				-            );
			
 
				-            $max_phase = chado_select_record('tripal_gffcds_temp', array('phase'), $values);
			
 
				-
			
 
				-            $pfmin = $result->fmin;
			
 
				-            $pfmax = $result->fmax;
			
 
				-            if ($result->strand == '-1') {
			
 
				-              $pfmax -= $max_phase[0]->phase;
			
 
				-            }
			
 
				-            else {
			
 
				-              $pfmin += $min_phase[0]->phase;
			
 
				-            }
			
 
				-
			
 
				-            // Add the new protein record.
			
 
				-            $feature = tripal_feature_load_gff3_feature($organism, $analysis_id,
			
 
				-              $protein_cvterm, $uname, $name, '', 'f', 'f', 1, 0);
			
 
				-            // Add the derives_from relationship.
			
 
				-            $cvterm = tripal_get_cvterm(array('cvterm_id' => $result->cvterm_id));
			
 
				-            tripal_feature_load_gff3_derives_from($feature, $cvterm,
			
 
				-              $result->uniquename, $organism, $pfmin, $pfmax);
			
 
				-            // Add the featureloc record. Set the start of the protein to
			
 
				-            // be the start of the coding sequence minus the phase.
			
 
				-            tripal_feature_load_gff3_featureloc($feature, $organism, $result->landmark,
			
 
				-              $pfmin, $pfmax, $result->strand, '', 'f', 'f', '', 0);
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      print "\nSetting ranks of children...\n";
			
 
				-
			
 
				-      // Get features in a relationship that are also children of an alignment.
			
 
				-      $sql = "
			
 
				-        SELECT DISTINCT F.feature_id, F.organism_id, F.type_id,
			
 
				-          F.uniquename, FL.strand
			
 
				-        FROM {tripal_gff_temp} TGT
			
 
				-          INNER JOIN {feature} F                ON TGT.feature_id = F.feature_id
			
 
				-          INNER JOIN {feature_relationship} FR  ON FR.object_id   = TGT.feature_id
			
 
				-          INNER JOIN {cvterm} CVT               ON CVT.cvterm_id  = FR.type_id
			
 
				-          INNER JOIN {featureloc} FL            ON FL.feature_id  = F.feature_id
			
 
				-        WHERE CVT.name = 'part_of'
			
 
				-      ";
			
 
				-      $parents = chado_query($sql);
			
 
				-
			
 
				-      // Build and prepare the SQL for selecting the children relationship.
			
 
				-      $sel_gffchildren_sql = "
			
 
				-        SELECT DISTINCT FR.feature_relationship_id, FL.fmin, FR.rank
			
 
				-        FROM {feature_relationship} FR
			
 
				-          INNER JOIN {featureloc} FL on FL.feature_id = FR.subject_id
			
 
				-          INNER JOIN {cvterm} CVT on CVT.cvterm_id = FR.type_id
			
 
				-        WHERE FR.object_id = :feature_id AND CVT.name = 'part_of'
			
 
				-        ORDER BY FL.fmin ASC
			
 
				-      ";
			
 
				-
			
 
				-      // Now set the rank of any parent/child relationships.  The order is based
			
 
				-      // on the fmin.  The start rank is 1.  This allows features with other
			
 
				-      // relationships to be '0' (the default), and doesn't interfer with the
			
 
				-      // ordering defined here.
			
 
				-      $num_recs = $parents->rowCount();
			
 
				-      $i = 1;
			
 
				-      $interval = intval($num_recs * 0.0001);
			
 
				-      if ($interval == 0) {
			
 
				-        $interval = 1;
			
 
				-      }
			
 
				-      $percent = sprintf("%.2f", ($i / $num_recs) * 100);
			
 
				-      print "Setting $i of $num_recs (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
			
 
				-
			
 
				-      while ($parent = $parents->fetchObject()) {
			
 
				-
			
 
				-        if ($i % $interval == 0) {
			
 
				-          $percent = sprintf("%.2f", ($i / $num_recs) * 100);
			
 
				-          print "Setting $i of $num_recs (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
			
 
				-        }
			
 
				-
			
 
				-        // get the children
			
 
				-        $result = chado_query($sel_gffchildren_sql, array(':feature_id' => $parent->feature_id));
			
 
				-
			
 
				-        // build an array of the children
			
 
				-        $children = array();
			
 
				-        while ($child = $result->fetchObject()) {
			
 
				-           $children[] = $child;
			
 
				-        }
			
 
				-
			
 
				-        // the children list comes sorted in ascending fmin
			
 
				-        // but if the parent is on the reverse strand we need to
			
 
				-        // reverse the order of the children.
			
 
				-        if ($parent->strand == -1) {
			
 
				-          arsort($children);
			
 
				-        }
			
 
				-
			
 
				-        // first set the ranks to a negative number so that we don't
			
 
				-        // get a duplicate error message when we try to change any of them
			
 
				-        $rank = -1;
			
 
				-        foreach ($children as $child) {
			
 
				-          $match = array('feature_relationship_id' => $child->feature_relationship_id);
			
 
				-          $values = array('rank' => $rank);
			
 
				-          chado_update_record('feature_relationship', $match, $values);
			
 
				-          $rank--;
			
 
				-        }
			
 
				-        // now set the rank correctly. The rank should start at 0.
			
 
				-        $rank = 0;
			
 
				-        foreach ($children as $child) {
			
 
				-          $match = array('feature_relationship_id' => $child->feature_relationship_id);
			
 
				-          $values = array('rank' => $rank);
			
 
				-          //print "Was: " . $child->rank . " now $rank ($parent->strand)\n"     ;
			
 
				-          chado_update_record('feature_relationship', $match, $values);
			
 
				-          $rank++;
			
 
				-        }
			
 
				-        $i++;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  catch (Exception $e) {
			
 
				-    print "\n"; // make sure we start errors on new line
			
 
				-    if ($use_transaction) {
			
 
				-      $transaction->rollback();
			
 
				-      print "FAILED: Rolling back database changes...\n";
			
 
				-    }
			
 
				-    else {
			
 
				-      print "FAILED\n";
			
 
				-    }
			
 
				-    watchdog_exception('tripal_chado', $e);
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  print "\nDone\n";
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Load the derives from attribute for a gff3 feature
			
 
				- *
			
 
				- * @param $feature
			
 
				- * @param $subject
			
 
				- * @param $organism
			
 
				- *
			
 
				- * @ingroup gff3_loader
			
 
				- */
			
 
				-function tripal_feature_load_gff3_derives_from($feature, $cvterm, $object,
			
 
				-  $organism, $fmin, $fmax) {
			
 
				-
			
 
				-  $type = $cvterm->name;
			
 
				-
			
 
				-  // First look for the object feature in the temp table to get it's type.
			
 
				-  $values = array(
			
 
				-    'organism_id' => $organism->organism_id,
			
 
				-    'uniquename' => $object,
			
 
				-  );
			
 
				-  $result = chado_select_record('tripal_gff_temp', array('type_name'), $values);
			
 
				-  $type_id = NULL;
			
 
				-  if (count($result) > 0) {
			
 
				-    $otype = tripal_get_cvterm(array(
			
 
				-      'name' => $result[0]->type_name,
			
 
				-      'cv_id' => array(
			
 
				-        'name' => 'sequence'
			
 
				-      )
			
 
				-    ));
			
 
				-    if ($otype) {
			
 
				-      $type_id = $otype->cvterm_id;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // If the object wasn't in the temp table then look for it in the
			
 
				-  // feature table and get it's type.
			
 
				-  if (!$type_id) {
			
 
				-    $result = chado_select_record('feature', array('type_id'), $values);
			
 
				-    if (count($result) > 1) {
			
 
				-      watchdog("tripal_chado", "Cannot find feature type for, '%subject' , in 'derives_from' relationship. Multiple matching features exist with this uniquename.",
			
 
				-        array('%subject' => $object), WATCHDOG_WARNING);
			
 
				-      return '';
			
 
				-    }
			
 
				-    else if (count($result) == 0) {
			
 
				-      watchdog("tripal_chado", "Cannot find feature type for, '%subject' , in 'derives_from' relationship.",
			
 
				-        array('%subject' => $object), WATCHDOG_WARNING);
			
 
				-      return '';
			
 
				-    }
			
 
				-    else {
			
 
				-      $type_id = $result->type_id;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // Get the object feature.
			
 
				-  $match = array(
			
 
				-    'organism_id' => $organism->organism_id,
			
 
				-    'uniquename' => $object,
			
 
				-    'type_id' => $type_id,
			
 
				-  );
			
 
				-  $ofeature = chado_select_record('feature', array('feature_id'), $match);
			
 
				-  if (count($ofeature) == 0) {
			
 
				-    tripal_report_error('tripal_chado', TRIPAL_ERROR, "Could not add 'Derives_from' relationship " .
			
 
				-      "for %uniquename and %subject.  Subject feature, '%subject', " .
			
 
				-      "cannot be found", array('%uniquename' => $feature->uniquename, '%subject' => $subject));
			
 
				-    return;
			
 
				-  }
			
 
				-
			
 
				-  // If this feature is a protein then add it to the tripal_gffprotein_temp.
			
 
				-  if ($type == 'protein' or $type == 'polypeptide') {
			
 
				-    $values = array(
			
 
				-      'feature_id' => $feature->feature_id,
			
 
				-      'parent_id' => $ofeature[0]->feature_id,
			
 
				-      'fmin' => $fmin,
			
 
				-      'fmax' => $fmax
			
 
				-    );
			
 
				-    $result = chado_insert_record('tripal_gffprotein_temp', $values);
			
 
				-    if (!$result) {
			
 
				-      tripal_report_error('tripal_chado', TRIPAL_ERROR, "Cound not save record in temporary protein table, Cannot continue.", array());
			
 
				-      exit;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-   // Now check to see if the relationship already exists. If it does
			
 
				-   // then just return.
			
 
				-  $values = array(
			
 
				-    'object_id' => $ofeature[0]->feature_id,
			
 
				-    'subject_id' => $feature->feature_id,
			
 
				-    'type_id' => array(
			
 
				-       'cv_id' => array(
			
 
				-          'name' => 'sequence'
			
 
				-        ),
			
 
				-       'name' => 'derives_from',
			
 
				-    ),
			
 
				-    'rank' => 0
			
 
				-  );
			
 
				-  $rel = chado_select_record('feature_relationship', array('*'), $values);
			
 
				-  if (count($rel) > 0) {
			
 
				-    return;
			
 
				-  }
			
 
				-
			
 
				-  // finally insert the relationship if it doesn't exist
			
 
				-  $ret = chado_insert_record('feature_relationship', $values);
			
 
				-  if (!$ret) {
			
 
				-    tripal_report_error("tripal_chado", TRIPAL_WARNING, "Could not add 'Derives_from' relationship for $feature->uniquename and $subject",
			
 
				-      array());
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Load the parents for a gff3 feature
			
 
				- *
			
 
				- * @param $feature
			
 
				- * @param $cvterm
			
 
				- * @param $parents
			
 
				- * @param $organism_id
			
 
				- * @param $fmin
			
 
				- *
			
 
				- * @ingroup gff3_loader
			
 
				- */
			
 
				-function tripal_feature_load_gff3_parents($feature, $cvterm, $parents,
			
 
				-  $organism_id, $strand, $phase, $fmin, $fmax) {
			
 
				-
			
 
				-  $uname = $feature->uniquename;
			
 
				-  $type = $cvterm->name;
			
 
				-  $rel_type = 'part_of';
			
 
				-
			
 
				-  // Prepare these SQL statements that will be used repeatedly.
			
 
				-  $cvterm_sql = "
			
 
				-    SELECT CVT.cvterm_id
			
 
				-    FROM {cvterm} CVT
			
 
				-      INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
			
 
				-      LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
			
 
				-    WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)
			
 
				-  ";
			
 
				-
			
 
				-  // Iterate through the parents in the list.
			
 
				-  foreach ($parents as $parent) {
			
 
				-    // Get the parent cvterm.
			
 
				-    $values = array(
			
 
				-      'organism_id' => $organism_id,
			
 
				-      'uniquename' => $parent,
			
 
				-    );
			
 
				-    $result = chado_select_record('tripal_gff_temp', array('type_name'), $values);
			
 
				-    if (count($result) == 0) {
			
 
				-      tripal_report_error("tripal_chado", TRIPAL_WARNING, "Cannot find parent: %parent", array('%parent' => $parent));
			
 
				-       return '';
			
 
				-    }
			
 
				-    $parent_type = $result[0]->type_name;
			
 
				-
			
 
				-    // try to find the parent
			
 
				-    $parentcvterm = chado_query($cvterm_sql, array(':cvname' => 'sequence', ':name' => $parent_type, ':synonym' => $parent_type))->fetchObject();
			
 
				-    $relcvterm = chado_query($cvterm_sql, array(':cvname' => 'sequence', ':name' => $rel_type, ':synonym' => $rel_type))->fetchObject();
			
 
				-    if (!$relcvterm) {
			
 
				-      tripal_report_error("tripal_feature", TRIPAL_WARNING, "Cannot find the term, 'part_of', from the sequence ontology. This term is used for associating parent and children features. Please check that the ontology is fully imported.");
			
 
				-      exit;
			
 
				-    }
			
 
				-    $values = array(
			
 
				-        'organism_id' => $organism_id,
			
 
				-        'uniquename' => $parent,
			
 
				-        'type_id' => $parentcvterm->cvterm_id,
			
 
				-    );
			
 
				-    $result = chado_select_record('feature', array('feature_id'), $values);
			
 
				-    $parent_feature = $result[0];
			
 
				-
			
 
				-    // if the parent exists then add the relationship otherwise print error and skip
			
 
				-    if ($parent_feature) {
			
 
				-
			
 
				-      // check to see if the relationship already exists
			
 
				-      $values = array(
			
 
				-        'object_id' => $parent_feature->feature_id,
			
 
				-        'subject_id' => $feature->feature_id,
			
 
				-        'type_id' => $relcvterm->cvterm_id,
			
 
				-      );
			
 
				-      $rel = chado_select_record('feature_relationship', array('*'), $values);
			
 
				-
			
 
				-      if (count($rel) > 0) {
			
 
				-      }
			
 
				-      else {
			
 
				-        // the relationship doesn't already exist, so add it.
			
 
				-        $values = array(
			
 
				-          'subject_id' => $feature->feature_id,
			
 
				-          'object_id'  => $parent_feature->feature_id,
			
 
				-          'type_id' => $relcvterm->cvterm_id,
			
 
				-        );
			
 
				-        $result = chado_insert_record('feature_relationship', $values);
			
 
				-        if (!$result) {
			
 
				-          tripal_report_error("tripal_chado", TRIPAL_WARNING, "Failed to insert feature relationship '$uname' ($type) $rel_type '$parent' ($parent_type)",
			
 
				-            array());
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      // If this feature is a CDS and now that we know the parent we can
			
 
				-      // add it to the tripal_gffcds_temp table for later lookup.
			
 
				-      if ($type == 'CDS') {
			
 
				-        $values = array(
			
 
				-          'feature_id' => $feature->feature_id,
			
 
				-          'parent_id' => $parent_feature->feature_id,
			
 
				-          'fmin' => $fmin,
			
 
				-          'fmax' => $fmax,
			
 
				-          'strand' => $strand,
			
 
				-        );
			
 
				-        if ($phase) {
			
 
				-         $values['phase'] = $phase;
			
 
				-        }
			
 
				-        $result = chado_insert_record('tripal_gffcds_temp', $values);
			
 
				-        if (!$result) {
			
 
				-          tripal_report_error('tripal_chado', TRIPAL_ERROR, "Cound not save record in temporary CDS table, Cannot continue.", array());
			
 
				-          exit;
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-    else {
			
 
				-      tripal_report_error("tripal_chado", TRIPAL_WARNING, "Cannot establish relationship '$uname' ($type) $rel_type '$parent' ($parent_type): Cannot find the parent",
			
 
				-        array());
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Load the dbxref attribute for a feature
			
 
				- *
			
 
				- * @param $feature
			
 
				- * @param $dbxrefs
			
 
				- *
			
 
				- * @ingroup gff3_loader
			
 
				- */
			
 
				-function tripal_feature_load_gff3_dbxref($feature, $dbxrefs) {
			
 
				-
			
 
				-  // iterate through each of the dbxrefs
			
 
				-  foreach ($dbxrefs as $dbxref) {
			
 
				-
			
 
				-    // get the database name from the reference.  If it doesn't exist then create one.
			
 
				-    $ref = explode(":", $dbxref);
			
 
				-    $dbname = trim($ref[0]);
			
 
				-    $accession = trim($ref[1]);
			
 
				-
			
 
				-    // first look for the database name if it doesn't exist then create one.
			
 
				-    // first check for the fully qualified URI (e.g. DB:<dbname>. If that
			
 
				-    // can't be found then look for the name as is.  If it still can't be found
			
 
				-    // the create the database
			
 
				-    $values = array('name' => "DB:$dbname");
			
 
				-    $db = chado_select_record('db', array('db_id'), $values);
			
 
				-    if (count($db) == 0) {
			
 
				-      $values = array('name' => "$dbname");
			
 
				-      $db = chado_select_record('db', array('db_id'), $values);
			
 
				-    }
			
 
				-    if (count($db) == 0) {
			
 
				-      $values = array(
			
 
				-        'name' => $dbname,
			
 
				-        'description' => 'Added automatically by the GFF loader'
			
 
				-      );
			
 
				-      $success = chado_insert_record('db', $values);
			
 
				-      if ($success) {
			
 
				-        $values = array('name' => "$dbname");
			
 
				-        $db = chado_select_record('db', array('db_id'), $values);
			
 
				-      }
			
 
				-      else {
			
 
				-        tripal_report_error("tripal_chado", TRIPAL_WARNING, "Cannot find or add the database $dbname", array());
			
 
				-        return 0;
			
 
				-      }
			
 
				-    }
			
 
				-    $db = $db[0];
			
 
				-
			
 
				-    // now check to see if the accession exists
			
 
				-    $values = array(
			
 
				-      'accession' => $accession,
			
 
				-      'db_id' => $db->db_id
			
 
				-    );
			
 
				-    $dbxref = chado_select_record('dbxref', array('dbxref_id'), $values);
			
 
				-
			
 
				-    // if the accession doesn't exist then we want to add it
			
 
				-    if (sizeof($dbxref) == 0) {
			
 
				-      $values = array(
			
 
				-        'db_id' => $db->db_id,
			
 
				-        'accession' => $accession,
			
 
				-        'version' => ''
			
 
				-      );
			
 
				-      $ret = chado_insert_record('dbxref', $values);
			
 
				-      $values = array(
			
 
				-        'accession' => $accession,
			
 
				-        'db_id' => $db->db_id
			
 
				-      );
			
 
				-      $dbxref = chado_select_record('dbxref', array('dbxref_id'), $values);
			
 
				-    }
			
 
				-    $dbxref = $dbxref[0];
			
 
				-
			
 
				-    // check to see if this feature dbxref already exists
			
 
				-    $values = array(
			
 
				-      'dbxref_id' => $dbxref->dbxref_id,
			
 
				-      'feature_id' => $feature->feature_id
			
 
				-    );
			
 
				-    $fdbx = chado_select_record('feature_dbxref', array('feature_dbxref_id'), $values);
			
 
				-
			
 
				-    // now associate this feature with the database reference if it doesn't
			
 
				-    // already exist
			
 
				-    if (sizeof($fdbx) == 0) {
			
 
				-      $values = array(
			
 
				-        'dbxref_id' => $dbxref->dbxref_id,
			
 
				-        'feature_id' => $feature->feature_id
			
 
				-      );
			
 
				-      $success = chado_insert_record('feature_dbxref', $values);
			
 
				-      if (!$success) {
			
 
				-        tripal_report_error("tripal_chado", TRIPAL_WARNING, "Failed to insert Dbxref: $dbname:$accession", array());
			
 
				-        return 0;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Load the cvterms for a feature. Assumes there is a dbxref.accession matching a cvterm.name
			
 
				- *
			
 
				- * @param $feature
			
 
				- * @param $dbxrefs
			
 
				- *
			
 
				- * @ingroup gff3_loader
			
 
				- */
			
 
				-function tripal_feature_load_gff3_ontology($feature, $dbxrefs) {
			
 
				-
			
 
				-   // iterate through each of the dbxrefs
			
 
				-  foreach ($dbxrefs as $dbxref) {
			
 
				-
			
 
				-    // get the database name from the reference.  If it doesn't exist then create one.
			
 
				-    $ref = explode(":", $dbxref);
			
 
				-    $dbname = trim($ref[0]);
			
 
				-    $accession = trim($ref[1]);
			
 
				-
			
 
				-    // first look for the database name
			
 
				-    $db = chado_select_record('db', array('db_id'), array('name' => "DB:$dbname"));
			
 
				-    if (sizeof($db) == 0) {
			
 
				-      // now look for the name without the 'DB:' prefix.
			
 
				-      $db = chado_select_record('db', array('db_id'), array('name' => "$dbname"));
			
 
				-      if (sizeof($db) == 0) {
			
 
				-        tripal_report_error("tripal_chado", TRIPAL_WARNING, "Database, $dbname, is not present. Cannot associate term: $dbname:$accession", array());
			
 
				-        return 0;
			
 
				-      }
			
 
				-    }
			
 
				-    $db = $db[0];
			
 
				-
			
 
				-    // now check to see if the accession exists
			
 
				-    $dbxref = chado_select_record('dbxref', array('dbxref_id'),
			
 
				-      array('accession' => $accession, 'db_id' => $db->db_id));
			
 
				-    if (sizeof($dbxref) == 0) {
			
 
				-      tripal_report_error("tripal_chado", TRIPAL_WARNING, "Accession, $accession is missing for reference: $dbname:$accession", array());
			
 
				-      return 0;
			
 
				-    }
			
 
				-    $dbxref = $dbxref[0];
			
 
				-
			
 
				-    // now check to see if the cvterm exists
			
 
				-    $cvterm = chado_select_record('cvterm', array('cvterm_id'), array(
			
 
				-       'dbxref_id' => $dbxref->dbxref_id));
			
 
				-    // if it doesn't exist in the cvterm table, look for an alternate id
			
 
				-    if (sizeof($cvterm) == 0) {
			
 
				-      $cvterm = chado_select_record('cvterm_dbxref', array('cvterm_id'), array(
			
 
				-        'dbxref_id' => $dbxref->dbxref_id));
			
 
				-      if (sizeof($cvterm) == 0) {
			
 
				-        tripal_report_error("tripal_chado", TRIPAL_WARNING, "CV Term is missing for reference: $dbname:$accession", array());
			
 
				-        return 0;
			
 
				-      }
			
 
				-    }
			
 
				-    $cvterm = $cvterm[0];
			
 
				-
			
 
				-
			
 
				-    // check to see if this feature cvterm already exists
			
 
				-    $fcvt = chado_select_record('feature_cvterm', array('feature_cvterm_id'),
			
 
				-      array('cvterm_id' => $cvterm->cvterm_id, 'feature_id' => $feature->feature_id));
			
 
				-
			
 
				-    // now associate this feature with the cvterm if it doesn't already exist
			
 
				-    if (sizeof($fcvt)==0) {
			
 
				-      $values = array(
			
 
				-        'cvterm_id' => $cvterm->cvterm_id,
			
 
				-        'feature_id' => $feature->feature_id,
			
 
				-        'pub_id' => array(
			
 
				-          'uniquename' => 'null',
			
 
				-        ),
			
 
				-      );
			
 
				-      $success = chado_insert_record('feature_cvterm', $values);
			
 
				-
			
 
				-      if (!$success) {
			
 
				-        tripal_report_error("tripal_chado", TRIPAL_WARNING, "Failed to insert ontology term: $dbname:$accession", array());
			
 
				-        return 0;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Load any aliases for a feature
			
 
				- *
			
 
				- * @param $feature
			
 
				- * @param $aliases
			
 
				- *
			
 
				- * @ingroup gff3_loader
			
 
				- */
			
 
				-function tripal_feature_load_gff3_alias($feature, $aliases) {
			
 
				-
			
 
				-  // make sure we have a 'synonym_type' vocabulary
			
 
				-  $select = array('name' => 'synonym_type');
			
 
				-  $results = chado_select_record('cv', array('*'), $select);
			
 
				-
			
 
				-  if (count($results) == 0) {
			
 
				-    // insert the 'synonym_type' vocabulary
			
 
				-    $values = array(
			
 
				-      'name' => 'synonym_type',
			
 
				-      'definition' => 'vocabulary for synonym types',
			
 
				-    );
			
 
				-    $success = chado_insert_record('cv', $values);
			
 
				-    if (!$success) {
			
 
				-      tripal_report_error("tripal_chado", TRIPAL_WARNING, "Failed to add the synonyms type vocabulary", array());
			
 
				-      return 0;
			
 
				-    }
			
 
				-    // now that we've added the cv we need to get the record
			
 
				-    $results = chado_select_record('cv', array('*'), $select);
			
 
				-    if (count($results) > 0) {
			
 
				-      $syncv = $results[0];
			
 
				-    }
			
 
				-  }
			
 
				-  else {
			
 
				-    $syncv = $results[0];
			
 
				-  }
			
 
				-
			
 
				-  // get the 'exact' cvterm, which is the type of synonym we're adding
			
 
				-  $select = array(
			
 
				-     'name' => 'exact',
			
 
				-     'cv_id' => array(
			
 
				-        'name' => 'synonym_type'
			
 
				-     ),
			
 
				-  );
			
 
				-  $result = chado_select_record('cvterm', array('*'), $select);
			
 
				-  if (count($result) == 0) {
			
 
				-    $term = array(
			
 
				-      'name' => 'exact',
			
 
				-      'id' => "synonym_type:exact",
			
 
				-      'definition' => '',
			
 
				-      'is_obsolete' => 0,
			
 
				-      'cv_name' => $syncv->name,
			
 
				-      'is_relationship' => FALSE
			
 
				-    );
			
 
				-    $syntype = tripal_insert_cvterm($term, array('update_existing' => TRUE));
			
 
				-    if (!$syntype) {
			
 
				-      tripal_report_error("tripal_chado", TRIPAL_WARNING, "Cannot add synonym type: internal:$type", array());
			
 
				-      return 0;
			
 
				-    }
			
 
				-  }
			
 
				-  else {
			
 
				-    $syntype = $result[0];
			
 
				-  }
			
 
				-
			
 
				-  // iterate through all of the aliases and add each one
			
 
				-  foreach ($aliases as $alias) {
			
 
				-
			
 
				-    // check to see if the alias already exists in the synonym table
			
 
				-    // if not, then add it
			
 
				-    $select = array(
			
 
				-       'name' => $alias,
			
 
				-       'type_id' => $syntype->cvterm_id,
			
 
				-    );
			
 
				-    $result = chado_select_record('synonym', array('*'), $select);
			
 
				-    if (count($result) == 0) {
			
 
				-      $values = array(
			
 
				-         'name' => $alias,
			
 
				-         'type_id' => $syntype->cvterm_id,
			
 
				-         'synonym_sgml' => '',
			
 
				-      );
			
 
				-      $success = chado_insert_record('synonym', $values);
			
 
				-      if (!$success) {
			
 
				-        tripal_report_error("tripal_chado", TRIPAL_WARNING, "Cannot add alias $alias to synonym table", array());
			
 
				-        return 0;
			
 
				-      }
			
 
				-      $result = chado_select_record('synonym', array('*'), $select);
			
 
				-      $synonym = $result[0];
			
 
				-    }
			
 
				-    else {
			
 
				-      $synonym = $result[0];
			
 
				-    }
			
 
				-
			
 
				-    // check to see if we have a NULL publication in the pub table.  If not,
			
 
				-    // then add one.
			
 
				-    $select = array('uniquename' => 'null');
			
 
				-    $result = chado_select_record('pub', array('*'), $select);
			
 
				-    if (count($result) == 0) {
			
 
				-      $pub_sql = "
			
 
				-        INSERT INTO {pub} (uniquename,type_id)
			
 
				-        VALUES (:uname,
			
 
				-          (SELECT cvterm_id
			
 
				-           FROM {cvterm} CVT
			
 
				-             INNER JOIN {dbxref} DBX ON DBX.dbxref_id = CVT.dbxref_id
			
 
				-             INNER JOIN {db} DB      ON DB.db_id      = DBX.db_id
			
 
				-           WHERE CVT.name = :type_id))
			
 
				-      ";
			
 
				-      $status = chado_query($psql);
			
 
				-      if (!$status) {
			
 
				-        tripal_report_error("tripal_chado", TRIPAL_WARNING, "Cannot prepare statement 'ins_pub_uniquename_typeid", array());
			
 
				-        return 0;
			
 
				-      }
			
 
				-
			
 
				-      // insert the null pub
			
 
				-      $result = chado_query($pub_sql, array(':uname' => 'null', ':type_id' => 'null'))->fetchObject();
			
 
				-      if (!$result) {
			
 
				-        tripal_report_error("tripal_chado", TRIPAL_WARNING, "Cannot add null publication needed for setup of alias", array());
			
 
				-        return 0;
			
 
				-      }
			
 
				-      $result = chado_select_record('pub', array('*'), $select);
			
 
				-      $pub = $result[0];
			
 
				-    }
			
 
				-    else {
			
 
				-      $pub = $result[0];
			
 
				-    }
			
 
				-
			
 
				-    // check to see if the synonym exists in the feature_synonym table
			
 
				-    // if not, then add it.
			
 
				-    $values = array(
			
 
				-       'synonym_id' => $synonym->synonym_id,
			
 
				-       'feature_id' => $feature->feature_id,
			
 
				-       'pub_id' => $pub->pub_id,
			
 
				-    );
			
 
				-    $columns = array('feature_synonym_id');
			
 
				-    $result = chado_select_record('feature_synonym', $columns, $values);
			
 
				-    if (count($result) == 0) {
			
 
				-      $values = array(
			
 
				-         'synonym_id' => $synonym->synonym_id,
			
 
				-         'feature_id' => $feature->feature_id,
			
 
				-         'pub_id' => $pub->pub_id,
			
 
				-      );
			
 
				-      $success = chado_insert_record('feature_synonym', $values);
			
 
				-
			
 
				-      if (!$success) {
			
 
				-        tripal_report_error("tripal_chado", TRIPAL_WARNING, "Cannot add alias $alias to feature synonym table", array());
			
 
				-        return 0;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Create the feature record & link it to it's analysis
			
 
				- *
			
 
				- * @param $organism
			
 
				- * @param $analysis_id
			
 
				- * @param $cvterm
			
 
				- * @param $uniquename
			
 
				- * @param $name
			
 
				- * @param $residues
			
 
				- * @param $is_analysis
			
 
				- * @param $is_obsolete
			
 
				- * @param $add_only
			
 
				- * @param $score
			
 
				- *
			
 
				- * @ingroup gff3_loader
			
 
				- */
			
 
				-function tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm, $uniquename,
			
 
				-  $name, $residues, $is_analysis = 'f', $is_obsolete = 'f', $add_only, $score) {
			
 
				-
			
 
				-  // Check to see if the feature already exists.
			
 
				-  $feature = NULL;
			
 
				-  $fselect = array(
			
 
				-    'organism_id' => $organism->organism_id,
			
 
				-    'uniquename' => $uniquename,
			
 
				-    'type_id' => $cvterm->cvterm_id
			
 
				-  );
			
 
				-  $columns = array('feature_id', 'name', 'uniquename', 'seqlen', 'organism_id', 'type_id');
			
 
				-  $result = chado_select_record('feature', $columns, $fselect);
			
 
				-  if (count($result) > 0) {
			
 
				-    $feature = $result[0];
			
 
				-  }
			
 
				-
			
 
				-  if (strcmp($is_obsolete, 'f')==0 or $is_obsolete == 0) {
			
 
				-    $is_obsolete = 'FALSE';
			
 
				-  }
			
 
				-  if (strcmp($is_obsolete, 't')==0 or $is_obsolete == 1) {
			
 
				-    $is_obsolete = 'TRUE';
			
 
				-  }
			
 
				-  if (strcmp($is_analysis, 'f')==0 or $is_analysis == 0) {
			
 
				-    $is_analysis = 'FALSE';
			
 
				-  }
			
 
				-  if (strcmp($is_analysis, 't')==0 or $is_analysis == 1) {
			
 
				-    $is_analysis = 'TRUE';
			
 
				-  }
			
 
				-
			
 
				-  // Insert the feature if it does not exist otherwise perform an update.
			
 
				-  if (!$feature) {
			
 
				-    $values = array(
			
 
				-      'organism_id' => $organism->organism_id,
			
 
				-      'name' => $name,
			
 
				-      'uniquename' => $uniquename,
			
 
				-      'md5checksum' => md5($residues),
			
 
				-      'type_id' => $cvterm->cvterm_id,
			
 
				-      'is_analysis' => $is_analysis,
			
 
				-      'is_obsolete' => $is_obsolete,
			
 
				-    );
			
 
				-    $feature = (object) chado_insert_record('feature', $values);
			
 
				-    if (!$feature) {
			
 
				-      tripal_report_error("tripal_chado", TRIPAL_WARNING, "Failed to insert feature '$uniquename' ($cvterm->name)", array());
			
 
				-      return 0;
			
 
				-    }
			
 
				-  }
			
 
				-  elseif (!$add_only) {
			
 
				-    $values = array(
			
 
				-      'name' => $name,
			
 
				-      'md5checksum' => md5($residues),
			
 
				-      'is_analysis' => $is_analysis,
			
 
				-      'is_obsolete' => $is_obsolete,
			
 
				-    );
			
 
				-    $match = array(
			
 
				-      'organism_id' => $organism->organism_id,
			
 
				-      'uniquename' => $uniquename,
			
 
				-      'type_id' => $cvterm->cvterm_id,
			
 
				-    );
			
 
				-    $result = chado_update_record('feature', $match, $values);
			
 
				-    if (!$result) {
			
 
				-      tripal_report_error("tripal_chado", TRIPAL_WARNING, "Failed to update feature '$uniquename' ($cvterm->name)", array());
			
 
				-      return 0;
			
 
				-    }
			
 
				-  }
			
 
				-  else {
			
 
				-    // The feature exists and we don't want to update it so return
			
 
				-    // a value of 0.  This will stop all downstream property additions
			
 
				-    return $feature;
			
 
				-  }
			
 
				-
			
 
				-  // Add the analysisfeature entry to the analysisfeature table if
			
 
				-  // it doesn't already exist.
			
 
				-  $af_values = array(
			
 
				-    'analysis_id' => $analysis_id,
			
 
				-    'feature_id' => $feature->feature_id
			
 
				-  );
			
 
				-  $afeature = chado_select_record('analysisfeature', array('analysisfeature_id'), $af_values);
			
 
				-  if (count($afeature)==0) {
			
 
				-    // if a score is available then set that to be the significance field
			
 
				-    if (strcmp($score, '.') != 0) {
			
 
				-      $af_values['significance'] = $score;
			
 
				-    }
			
 
				-    if (!chado_insert_record('analysisfeature', $af_values)) {
			
 
				-      tripal_report_error("tripal_chado", TRIPAL_WARNING, "Could not add analysisfeature record: $analysis_id, $feature->feature_id", array());
			
 
				-    }
			
 
				-  }
			
 
				-  else {
			
 
				-    // if a score is available then set that to be the significance field
			
 
				-    $new_vals = array();
			
 
				-    if (strcmp($score, '.')!=0) {
			
 
				-      $new_vals['significance'] = $score;
			
 
				-    }
			
 
				-    else {
			
 
				-      $new_vals['significance'] = '__NULL__';
			
 
				-    }
			
 
				-    if (!$add_only) {
			
 
				-      $ret = chado_update_record('analysisfeature', $af_values, $new_vals);
			
 
				-      if (!$ret) {
			
 
				-        tripal_report_error("tripal_chado", TRIPAL_WARNING, "Could not update analysisfeature record: $analysis_id, $feature->feature_id", array());
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  return $feature;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Insert the location of the feature
			
 
				- *
			
 
				- * @param $feature
			
 
				- * @param $organism
			
 
				- * @param $landmark
			
 
				- * @param $fmin
			
 
				- * @param $fmax
			
 
				- * @param $strand
			
 
				- * @param $phase
			
 
				- * @param $is_fmin_partial
			
 
				- * @param $is_fmax_partial
			
 
				- * @param $residue_info
			
 
				- * @param $locgroup
			
 
				- * @param $landmark_type_id
			
 
				- * @param $landmark_organism_id
			
 
				- * @param $create_landmark
			
 
				- * @param $landmark_is_target
			
 
				- *
			
 
				- * @ingroup gff3_loader
			
 
				- */
			
 
				-function tripal_feature_load_gff3_featureloc($feature, $organism, $landmark, $fmin,
			
 
				-  $fmax, $strand, $phase, $is_fmin_partial, $is_fmax_partial, $residue_info, $locgroup,
			
 
				-  $landmark_type_id = '', $landmark_organism_id = '', $create_landmark = 0,
			
 
				-  $landmark_is_target = 0) {
			
 
				-
			
 
				-  $select = array(
			
 
				-    'organism_id' => $landmark_organism_id ? $landmark_organism_id : $organism->organism_id,
			
 
				-    'uniquename' => $landmark,
			
 
				-  );
			
 
				-  if ($landmark_type_id) {
			
 
				-    $select['type_id'] = $landmark_type_id;
			
 
				-  }
			
 
				-  $results = chado_select_record('feature', array('feature_id'), $select);
			
 
				-
			
 
				-  $srcfeature = '';
			
 
				-  if (count($results)==0) {
			
 
				-    // so we couldn't find the landmark using the uniquename. Let's try the 'name'.
			
 
				-    // if we return only a single result then we can proceed. Otherwise give an
			
 
				-    $select = array(
			
 
				-      'organism_id' => $landmark_organism_id ? $landmark_organism_id : $organism->organism_id,
			
 
				-      'name' => $landmark,
			
 
				-    );
			
 
				-    if ($landmark_type_id) {
			
 
				-      $select['type_id'] = $landmark_type_id;
			
 
				-    }
			
 
				-    $results = chado_select_record('feature', array('feature_id'), $select);
			
 
				-    if (count($results) == 0) {
			
 
				-       // if the landmark is the target feature in a matched alignment then try one more time to
			
 
				-       // find it by querying any feature with the same uniquename. If we find one then use it.
			
 
				-       if ($landmark_is_target) {
			
 
				-         $select = array('uniquename' => $landmark);
			
 
				-         $results = chado_select_record('feature', array('feature_id'), $select);
			
 
				-         if (count($results) == 1) {
			
 
				-           $srcfeature = $results[0];
			
 
				-         }
			
 
				-       }
			
 
				-
			
 
				-       if (!$srcfeature) {
			
 
				-         // we couldn't find the landmark feature, so if the user has requested we create it then do so
			
 
				-         // but only if we have a type id
			
 
				-         if ($create_landmark and $landmark_type_id) {
			
 
				-            $values = array(
			
 
				-              'organism_id' => $landmark_organism_id ? $landmark_organism_id : $organism->organism_id,
			
 
				-              'name' => $landmark,
			
 
				-              'uniquename' => $landmark,
			
 
				-              'type_id' => $landmark_type_id
			
 
				-            );
			
 
				-            $results = chado_insert_record('feature', $values);
			
 
				-            if (!$results) {
			
 
				-              tripal_report_error("tripal_chado", TRIPAL_WARNING, "Cannot find landmark feature: '%landmark', nor could it be inserted",
			
 
				-                array('%landmark' => $landmark));
			
 
				-              return 0;
			
 
				-            }
			
 
				-            $srcfeature = new stdClass();
			
 
				-            $srcfeature->feature_id = $results['feature_id'];
			
 
				-         }
			
 
				-         else {
			
 
				-           tripal_report_error("tripal_chado", TRIPAL_WARNING, "Cannot find unique landmark feature: '%landmark'.",
			
 
				-             array('%landmark' => $landmark));
			
 
				-           return 0;
			
 
				-         }
			
 
				-       }
			
 
				-    }
			
 
				-    elseif (count($results) > 1) {
			
 
				-       tripal_report_error("tripal_chado", TRIPAL_WARNING, "multiple landmarks exist with the name: '%landmark'.  Cannot
			
 
				-         resolve which one to use. Cannot add the feature location record",
			
 
				-         array('%landmark' => $landmark));
			
 
				-       return 0;
			
 
				-    }
			
 
				-    else {
			
 
				-      $srcfeature = $results[0];
			
 
				-    }
			
 
				-  }
			
 
				-  elseif (count($results) > 1) {
			
 
				-    tripal_report_error("tripal_chado", TRIPAL_WARNING, "multiple landmarks exist with the name: '%landmark'.  Cannot
			
 
				-      resolve which one to use. Cannot add the feature location record",
			
 
				-      array('%landmark' => $landmark));
			
 
				-    return 0;
			
 
				-  }
			
 
				-  else {
			
 
				-    $srcfeature = $results[0];
			
 
				-  }
			
 
				-
			
 
				-  // TODO: create an attribute that recognizes the residue_info,locgroup,
			
 
				-  //  is_fmin_partial and is_fmax_partial, right now these are
			
 
				-  //  hardcoded to be false and 0 below.
			
 
				-
			
 
				-  // check to see if this featureloc already exists, but also keep track of the
			
 
				-  // last rank value
			
 
				-  $rank = 0;
			
 
				-  $exists = 0;
			
 
				-  $select = array('feature_id' => $feature->feature_id);
			
 
				-  $options = array(
			
 
				-    'order_by' => array(
			
 
				-       'rank' => 'ASC'
			
 
				-    ),
			
 
				-  );
			
 
				-
			
 
				-  $locrecs = chado_select_record('featureloc', array('*'), $select, $options);
			
 
				-
			
 
				-  foreach ($locrecs as $featureloc) {
			
 
				-    // it is possible for the featureloc->srcfeature_id to be NULL. This can happen if the srcfeature
			
 
				-    // is not known (according to chado table field descriptions).  If it's null then just skip this entry
			
 
				-    if (!$featureloc->srcfeature_id) {
			
 
				-      continue;
			
 
				-    }
			
 
				-    $select = array('feature_id' => $featureloc->srcfeature_id);
			
 
				-    $columns = array('feature_id', 'name');
			
 
				-    $locsfeature = chado_select_record('feature', $columns, $select);
			
 
				-
			
 
				-    // the source feature name and at least the fmin and fmax must be the same
			
 
				-    // for an update of the featureloc, otherwise we'll insert a new record.
			
 
				-    if (strcmp($locsfeature[0]->name, $landmark)==0 and
			
 
				-       ($featureloc->fmin == $fmin or $featureloc->fmax == $fmax)) {
			
 
				-      $match = array('featureloc_id' => $featureloc->featureloc_id);
			
 
				-      $values = array();
			
 
				-      $exists = 1;
			
 
				-      if ($featureloc->fmin != $fmin) {
			
 
				-         $values['fmin'] = $fmin;
			
 
				-      }
			
 
				-      if ($featureloc->fmax != $fmax) {
			
 
				-         $values['fmax'] = $fmax;
			
 
				-      }
			
 
				-      if ($featureloc->strand != $strand) {
			
 
				-         $values['strand'] = $strand;
			
 
				-      }
			
 
				-      if (count($values) > 0) {
			
 
				-        chado_update_record('featureloc', $match, $values);
			
 
				-      }
			
 
				-    }
			
 
				-    $rank = $featureloc->rank + 1;
			
 
				-  }
			
 
				-  if (!$exists) {
			
 
				-
			
 
				-    // this feature location is new so add it
			
 
				-    if (strcmp($is_fmin_partial, 'f')==0 or !$is_fmin_partial) {
			
 
				-      $is_fmin_partial = 'FALSE';
			
 
				-    }
			
 
				-    elseif (strcmp($is_fmin_partial, 't')==0 or $is_fmin_partial = 1) {
			
 
				-      $is_fmin_partial = 'TRUE';
			
 
				-    }
			
 
				-    if (strcmp($is_fmax_partial, 'f')==0 or !$is_fmax_partial) {
			
 
				-      $is_fmax_partial = 'FALSE';
			
 
				-    }
			
 
				-    elseif (strcmp($is_fmax_partial, 't')==0 or $is_fmax_partial = 1) {
			
 
				-      $is_fmax_partial = 'TRUE';
			
 
				-    }
			
 
				-    $values = array(
			
 
				-       'feature_id'      => $feature->feature_id,
			
 
				-       'srcfeature_id'   => $srcfeature->feature_id,
			
 
				-       'fmin'            => $fmin,
			
 
				-       'is_fmin_partial' => $is_fmin_partial,
			
 
				-       'fmax'            => $fmax,
			
 
				-       'is_fmax_partial' => $is_fmax_partial,
			
 
				-       'strand'          => $strand,
			
 
				-       'residue_info'    => $residue_info,
			
 
				-       'locgroup'        => $locgroup,
			
 
				-       'rank'            => $rank
			
 
				-    );
			
 
				-    if ($phase) {
			
 
				-      $values['phase'] = $phase;
			
 
				-    }
			
 
				-    $success = chado_insert_record('featureloc', $values);
			
 
				-    if (!$success) {
			
 
				-      tripal_report_error("tripal_chado", TRIPAL_WARNING, "Failed to insert featureloc", array());
			
 
				-      exit;
			
 
				-      return 0;
			
 
				-    }
			
 
				-  }
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Load a preoprty (featurepop) for the feature
			
 
				- *
			
 
				- * @param $feature
			
 
				- * @param $property
			
 
				- * @param $value
			
 
				- *
			
 
				- * @ingroup gff3_loader
			
 
				- */
			
 
				-function tripal_feature_load_gff3_property($feature, $property, $value) {
			
 
				-
			
 
				-  // first make sure the cvterm exists.  if not, then add it
			
 
				-  $select = array(
			
 
				-     'name' => $property,
			
 
				-     'cv_id' => array(
			
 
				-        'name' => 'feature_property',
			
 
				-     ),
			
 
				-  );
			
 
				-  $result = chado_select_record('cvterm', array('*'), $select);
			
 
				-
			
 
				-  // if we don't have a property like this already, then add it otherwise, just return
			
 
				-  if (count($result) == 0) {
			
 
				-    $term = array(
			
 
				-      'id' => "null:$property",
			
 
				-      'name' => $property,
			
 
				-      'namespace' => 'feature_property',
			
 
				-      'is_obsolete' => 0,
			
 
				-      'cv_name' => 'feature_property',
			
 
				-      'is_relationship' => FALSE
			
 
				-    );
			
 
				-    $cvterm = (object) tripal_insert_cvterm($term, array('update_existing' => FALSE));
			
 
				-    if (!$cvterm) {
			
 
				-      tripal_report_error("tripal_chado", TRIPAL_WARNING, "Cannot add cvterm, $property", array());
			
 
				-      return 0;
			
 
				-    }
			
 
				-  }
			
 
				-  else {
			
 
				-    $cvterm = $result[0];
			
 
				-  }
			
 
				-
			
 
				-
			
 
				-  // check to see if the property already exists for this feature
			
 
				-  // if it does but the value is unique then increment the rank and add it.
			
 
				-  // if the value is not unique then don't add it.
			
 
				-  $add = 1;
			
 
				-  $rank = 0;
			
 
				-  $select = array(
			
 
				-     'feature_id' => $feature->feature_id,
			
 
				-     'type_id' => $cvterm->cvterm_id,
			
 
				-  );
			
 
				-  $options = array(
			
 
				-    'order_by' => array(
			
 
				-      'rank' => 'ASC',
			
 
				-    ),
			
 
				-  );
			
 
				-  $results = chado_select_record('featureprop', array('*'), $select, $options);
			
 
				-  foreach ($results as $prop) {
			
 
				-    if (strcmp($prop->value, $value)==0) {
			
 
				-      $add = NULL; // don't add it, it already exists
			
 
				-    }
			
 
				-    $rank = $prop->rank + 1;
			
 
				-  }
			
 
				-
			
 
				-  // add the property if we pass the check above
			
 
				-  if ($add) {
			
 
				-    $values = array(
			
 
				-       'feature_id' => $feature->feature_id,
			
 
				-       'type_id' => $cvterm->cvterm_id,
			
 
				-       'value' => $value,
			
 
				-       'rank' => $rank,
			
 
				-    );
			
 
				-    $result = chado_insert_record('featureprop', $values);
			
 
				-    if (!$result) {
			
 
				-      tripal_report_error("tripal_chado", TRIPAL_WARNING, "cannot add featureprop, $property", array());
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Load the FASTA sequences at the bottom of a GFF3 file
			
 
				- *
			
 
				- * @param $fh
			
 
				- * @param $interval
			
 
				- * @param $num_read
			
 
				- * @param $intv_read
			
 
				- * @param $line_num
			
 
				- * @param $filesize
			
 
				- * @param $job
			
 
				- *
			
 
				- * @ingroup gff3_loader
			
 
				- */
			
 
				-function tripal_feature_load_gff3_fasta($fh, $interval, &$num_read, &$intv_read, &$line_num, $filesize, $job) {
			
 
				-  print "\nLoading FASTA sequences\n";
			
 
				-  $residues = '';
			
 
				-  $id = NULL;
			
 
				-
			
 
				-  $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
			
 
				-  print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
			
 
				-  // iterate through the remaining lines of the file
			
 
				-  while ($line = fgets($fh)) {
			
 
				-
			
 
				-    $line_num++;
			
 
				-    $size = drupal_strlen($line);
			
 
				-    $num_read += $size;
			
 
				-    $intv_read += $size;
			
 
				-
			
 
				-    $line = trim($line);
			
 
				-
			
 
				-    // update the job status every 1% features
			
 
				-    if ($job and $intv_read >= $interval) {
			
 
				-      $intv_read = 0;
			
 
				-      $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
			
 
				-      print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
			
 
				-      tripal_set_job_progress($job, intval(($num_read / $filesize) * 100));
			
 
				-    }
			
 
				-
			
 
				-    // if we encounter a definition line then get the name, uniquename,
			
 
				-    // accession and relationship subject from the definition line
			
 
				-    if (preg_match('/^>/', $line)) {
			
 
				-
			
 
				-      // if we are beginning a new sequence then save to the database the last one we just finished.
			
 
				-      if ($id) {
			
 
				-        $values = array('uniquename' => $id);
			
 
				-        $result = chado_select_record('tripal_gff_temp', array('*'), $values);
			
 
				-        if (count($result) == 0) {
			
 
				-          tripal_report_error('tripal_chado', TRIPAL_WARNING, 'Cannot find feature to assign FASTA sequence: %uname',
			
 
				-             array('%uname' => $id));
			
 
				-        }
			
 
				-        else {
			
 
				-          // if we have a feature then add the residues
			
 
				-          $feature = $result[0];
			
 
				-          $values = array(
			
 
				-            'residues' => $residues,
			
 
				-            'seqlen' => strlen($residues)
			
 
				-          );
			
 
				-          $match = array('feature_id' => $feature->feature_id);
			
 
				-          chado_update_record('feature', $match, $values);
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      // get the feature ID for this ID from the tripal_gff_temp table. It
			
 
				-      // should be the name up to the first space
			
 
				-      $id = preg_replace('/^>([^\s]+).*$/', '\1', $line);
			
 
				-      $residues = '';
			
 
				-    }
			
 
				-    else {
			
 
				-      $residues .= trim($line);
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // add in the last sequence
			
 
				-  $values = array('uniquename' => $id);
			
 
				-  $result = chado_select_record('tripal_gff_temp', array('*'), $values);
			
 
				-  if (count($result) == 0) {
			
 
				-    tripal_report_error('tripal_chado', TRIPAL_WARNING, 'Cannot find feature to assign FASTA sequence: %uname',
			
 
				-       array('%uname' => $id));
			
 
				-  }
			
 
				-  else {
			
 
				-    // if we have a feature then add the residues
			
 
				-    $feature = $result[0];
			
 
				-    $values = array(
			
 
				-      'residues' => $residues,
			
 
				-      'seqlen' => strlen($residues)
			
 
				-    );
			
 
				-    $match = array('feature_id' => $feature->feature_id);
			
 
				-    chado_update_record('feature', $match, $values);
			
 
				-  }
			
 
				-
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Load the target attribute of a gff3 record
			
 
				- *
			
 
				- * @param $feature
			
 
				- * @param $tags
			
 
				- * @param $target_organism_id
			
 
				- * @param $target_type
			
 
				- * @param $create_target
			
 
				- * @param $attr_locgroup
			
 
				- *
			
 
				- * @ingroup gff3_loader
			
 
				- */
			
 
				-function tripal_feature_load_gff3_target($feature, $tags, $target_organism_id, $target_type, $create_target, $attr_locgroup) {
			
 
				-  // format is: "target_id start end [strand]", where strand is optional and may be "+" or "-"
			
 
				-  $matched = preg_match('/^(.*?)\s+(\d+)\s+(\d+)(\s+[\+|\-])*$/', trim($tags['Target'][0]), $matches);
			
 
				-
			
 
				-  // the organism and type of the target may also be specified as an attribute. If so, then get that
			
 
				-  // information
			
 
				-  $gff_target_organism = array_key_exists('target_organism', $tags) ? $tags['target_organism'][0] : '';
			
 
				-  $gff_target_type = array_key_exists('target_type', $tags) ? $tags['target_type'][0] : '';
			
 
				-
			
 
				-  // if we have matches and the Target is in the correct format then load the alignment
			
 
				-  if ($matched) {
			
 
				-    $target_feature = $matches[1];
			
 
				-    $start = $matches[2];
			
 
				-    $end = $matches[3];
			
 
				-    // if we have an optional strand, convert it to a numeric value.
			
 
				-    if ($matches[4]) {
			
 
				-      if (preg_match('/^\+$/', trim($matches[4]))) {
			
 
				-        $target_strand = 1;
			
 
				-      }
			
 
				-      elseif (preg_match('/^\-$/', trim($matches[4]))) {
			
 
				-        $target_strand = -1;
			
 
				-      }
			
 
				-      else {
			
 
				-        $target_strand = 0;
			
 
				-      }
			
 
				-    }
			
 
				-    else {
			
 
				-       $target_strand = 0;
			
 
				-    }
			
 
				-
			
 
				-    $target_fmin = $start - 1;
			
 
				-    $target_fmax = $end;
			
 
				-    if ($end < $start) {
			
 
				-      $target_fmin = $end - 1;
			
 
				-      $target_fmax = $start;
			
 
				-    }
			
 
				-
			
 
				-    // default the target organism to be the value passed into the function, but if the GFF
			
 
				-    // file species the target organism then use that instead.
			
 
				-    $t_organism_id = $target_organism_id;
			
 
				-    if ($gff_target_organism) {
			
 
				-      // get the genus and species
			
 
				-      $success = preg_match('/^(.*?):(.*?)$/', $gff_target_organism, $matches);
			
 
				-      if ($success) {
			
 
				-        $values = array(
			
 
				-          'genus' => $matches[1],
			
 
				-          'species' => $matches[2],
			
 
				-        );
			
 
				-        $torganism = chado_select_record('organism', array('organism_id'), $values);
			
 
				-        if (count($torganism) == 1) {
			
 
				-          $t_organism_id = $torganism[0]->organism_id;
			
 
				-        }
			
 
				-        else {
			
 
				-          tripal_report_error('tripal_chado', TRIPAL_WARNING, "Cannot find organism for target %target.",
			
 
				-            array('%target' => $gff_target_organism));
			
 
				-          $t_organism_id = '';
			
 
				-        }
			
 
				-      }
			
 
				-      else {
			
 
				-        tripal_report_error('tripal_chado', TRIPAL_WARNING, "The target_organism attribute is improperly formatted: %target.
			
 
				-          It should be target_organism=genus:species.",
			
 
				-          array('%target' => $gff_target_organism));
			
 
				-        $t_organism_id = '';
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    // default the target type to be the value passed into the function, but if the GFF file
			
 
				-    // species the target type then use that instead
			
 
				-    $t_type_id = '';
			
 
				-    if ($target_type) {
			
 
				-      $values = array(
			
 
				-        'name' => $target_type,
			
 
				-        'cv_id' => array(
			
 
				-           'name' => 'sequence',
			
 
				-        )
			
 
				-      );
			
 
				-      $type = chado_select_record('cvterm', array('cvterm_id'), $values);
			
 
				-      if (count($type) == 1) {
			
 
				-        $t_type_id = $type[0]->cvterm_id;
			
 
				-      }
			
 
				-      else {
			
 
				-        tripal_report_error('tripal_chado', TRIPAL_ERROR, "The target type does not exist in the sequence ontology: %type. ",
			
 
				-          array('%type' => $target_type));
			
 
				-        exit;
			
 
				-      }
			
 
				-    }
			
 
				-    if ($gff_target_type) {
			
 
				-      $values = array(
			
 
				-        'name' => $gff_target_type,
			
 
				-        'cv_id' => array(
			
 
				-           'name' => 'sequence',
			
 
				-        )
			
 
				-      );
			
 
				-
			
 
				-      // get the cvterm_id for the target type
			
 
				-      $type = chado_select_record('cvterm', array('cvterm_id'), $values);
			
 
				-      if (count($type) == 1) {
			
 
				-        $t_type_id = $type[0]->cvterm_id;
			
 
				-      }
			
 
				-      else {
			
 
				-        // check to see if this is a synonym
			
 
				-        $sql = "
			
 
				-          SELECT CVTS.cvterm_id
			
 
				-          FROM {cvtermsynonym} CVTS
			
 
				-            INNER JOIN {cvterm} CVT ON CVT.cvterm_id = CVTS.cvterm_id
			
 
				-            INNER JOIN {cv} CV      ON CV.cv_id = CVT.cv_id
			
 
				-          WHERE CV.name = 'sequence' and CVTS.synonym = :synonym
			
 
				-        ";
			
 
				-        $synonym = chado_query($sql, array(':synonym' => $gff_target_type))->fetchObject();
			
 
				-        if ($synonym) {
			
 
				-          $t_type_id = $synonym->cvterm_id;
			
 
				-        }
			
 
				-        else {
			
 
				-          tripal_report_error('tripal_chado', TRIPAL_WARNING, "The target_type attribute does not exist in the sequence ontology: %type. ",
			
 
				-            array('%type' => $gff_target_type));
			
 
				-          $t_type_id = '';
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    // we want to add a featureloc record that uses the target feature as the srcfeature (landmark)
			
 
				-    // and the landmark as the feature.
			
 
				-    tripal_feature_load_gff3_featureloc($feature, $organism, $target_feature, $target_fmin,
			
 
				-      $target_fmax, $target_strand, $phase, $attr_fmin_partial, $attr_fmax_partial, $attr_residue_info,
			
 
				-      $attr_locgroup, $t_type_id, $t_organism_id, $create_target, TRUE);
			
 
				-  }
			
 
				-  // the target attribute is not correctly formatted
			
 
				-  else {
			
 
				-    tripal_report_error('tripal_chado', TRIPAL_ERROR, "Could not add 'Target' alignment as it is improperly formatted:  '%target'",
			
 
				-      array('%target' => $tags['Target'][0]));
			
 
				-  }
			
 
				-}
			
--- a/tripal_chado/includes/loaders/tripal_chado.obo_loader.inc
+++ b/tripal_chado/includes/loaders/tripal_chado.obo_loader.inc
@@ -1,1456 +0,0 @@
 
				-<?php
			
 
				-/**
			
 
				- * @file
			
 
				- * Functions to aid in loading ontologies into the chado cv module
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * @defgroup tripal_obo_loader Ontology Loader
			
 
				- * @ingroup tripal_chado
			
 
				- * @{
			
 
				- * Functions to aid in loading ontologies into the chado cv module
			
 
				- * @}
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * Provides the form to load an already existing controlled
			
 
				- *  Vocabulary into chado
			
 
				- *
			
 
				- * @param $form
			
 
				- *   The form array
			
 
				- * @param $form_state
			
 
				- *   The form state array
			
 
				- *
			
 
				- * @return
			
 
				- *   The form array with new additions
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_cv_obo_form($form, &$form_state) {
			
 
				-
			
 
				-  // get a list of db from chado for user to choose
			
 
				-  $sql = "SELECT * FROM {tripal_cv_obo} ORDER BY name";
			
 
				-  $results = db_query($sql);
			
 
				-
			
 
				-  $obos = array();
			
 
				-  $obos[] = 'Select a Vocabulary';
			
 
				-  foreach ($results as $obo) {
			
 
				-    $obos[$obo->obo_id] = $obo->name;
			
 
				-  }
			
 
				-
			
 
				-  $obo_id = '';
			
 
				-  if (array_key_exists('values', $form_state)) {
			
 
				-    $obo_id = array_key_exists('obo_id', $form_state['values']) ? $form_state['values']['obo_id'] : '';
			
 
				-  }
			
 
				-
			
 
				-
			
 
				-  $form['instructions']['info'] = array(
			
 
				-    '#type' => 'item',
			
 
				-    '#markup' => t('This page allows you to load vocabularies and ontologies
			
 
				-      that are in OBO format. Once loaded, the terms from these
			
 
				-      vocabularies can be used to create content.
			
 
				-      You may use the form below to either reload a vocabulary that is already
			
 
				-      loaded (as when new updates to that vocabulary are available) or load a new
			
 
				-      vocabulary.'),
			
 
				-  );
			
 
				-
			
 
				-  $form['obo_existing'] = array(
			
 
				-    '#type' => 'fieldset',
			
 
				-    '#title' => t('Use a Saved Ontology OBO Reference'),
			
 
				-    '#prefix' => '<span id="obo-existing-fieldset">',
			
 
				-    '#suffix' => '</span>'
			
 
				-  );
			
 
				-
			
 
				-  $form['obo_existing']['existing_instructions']= array(
			
 
				-    '#type' => 'item',
			
 
				-    '#markup' => t('The vocabularies listed in the select box below have bene pre-populated
			
 
				-      upon installation of Tripal or have been previously loaded.  Select one to edit
			
 
				-      its settings or submit for loading.  You may reload any vocabulary that has
			
 
				-      already been loaded to retrieve any new updates.'),
			
 
				-  );
			
 
				-
			
 
				-  $form['obo_existing']['obo_id'] = array(
			
 
				-    '#title' => t('Ontology OBO File Reference'),
			
 
				-    '#type' => 'select',
			
 
				-    '#options' => $obos,
			
 
				-    '#ajax' => array(
			
 
				-      'callback' => 'tripal_cv_obo_form_ajax_callback',
			
 
				-      'wrapper' => 'obo-existing-fieldset',
			
 
				-    ),
			
 
				-    '#description' => t('Select a vocabulary to import.')
			
 
				-  );
			
 
				-
			
 
				-  // If the user has selected an OBO ID then get the form elements for
			
 
				-  // updating.
			
 
				-  if ($obo_id) {
			
 
				-    $uobo_name = '';
			
 
				-    $uobo_url = '';
			
 
				-    $uobo_file = '';
			
 
				-
			
 
				-    $vocab = db_select('tripal_cv_obo', 't')
			
 
				-      ->fields('t', array('name', 'path'))
			
 
				-      ->condition('obo_id', $obo_id)
			
 
				-      ->execute()
			
 
				-      ->fetchObject();
			
 
				-    $uobo_name = $vocab->name;
			
 
				-    if (preg_match('/^http/', $vocab->path)) {
			
 
				-      $uobo_url = $vocab->path;
			
 
				-    }
			
 
				-    else {
			
 
				-      $uobo_file = trim($vocab->path);
			
 
				-      $matches = array();
			
 
				-      if (preg_match('/\{(.*?)\}/', $uobo_file, $matches)) {
			
 
				-        $modpath = drupal_get_path('module', $matches[1]);
			
 
				-        $uobo_file = preg_replace('/\{.*?\}/', $modpath, $uobo_file);
			
 
				-      }
			
 
				-    }
			
 
				-    // We don't want the previous value to remain. We want the new default to
			
 
				-    // show up, so remove the input values
			
 
				-    unset($form_state['input']['uobo_name']);
			
 
				-    unset($form_state['input']['uobo_url']);
			
 
				-    unset($form_state['input']['uobo_file']);
			
 
				-
			
 
				-    $form['obo_existing']['uobo_name']= array(
			
 
				-      '#type'          => 'textfield',
			
 
				-      '#title'         => t('Vocabulary Name'),
			
 
				-      '#description'   => t('Please provide a name for this vocabulary.  After upload, this name will appear in the drop down
			
 
				-                           list above for use again later.'),
			
 
				-      '#default_value'  => $uobo_name,
			
 
				-    );
			
 
				-
			
 
				-    $form['obo_existing']['uobo_url']= array(
			
 
				-      '#type'          => 'textfield',
			
 
				-      '#title'         => t('Remote URL'),
			
 
				-      '#description'   => t('Please enter a URL for the online OBO file.  The file will be downloaded and parsed.
			
 
				-                           (e.g. http://www.obofoundry.org/ro/ro.obo)'),
			
 
				-      '#default_value' => $uobo_url,
			
 
				-    );
			
 
				-
			
 
				-    $form['obo_existing']['uobo_file']= array(
			
 
				-      '#type'          => 'textfield',
			
 
				-      '#title'         => t('Local File'),
			
 
				-      '#description'   => t('Please enter the file system path for an OBO
			
 
				-        definition file. If entering a path relative to
			
 
				-        the Drupal installation you may use a relative path that excludes the
			
 
				-        Drupal installation directory (e.g. sites/default/files/xyz.obo). Note
			
 
				-        that Drupal relative paths have no preceeding slash.
			
 
				-        Otherwise, please provide the full path on the filesystem.  The path
			
 
				-        must be accessible to the web server on which this Drupal instance is running.'),
			
 
				-      '#default_value' => $uobo_file,
			
 
				-    );
			
 
				-    $form['obo_existing']['update_obo_details'] = array(
			
 
				-      '#type' => 'submit',
			
 
				-      '#value' => 'Update Ontology Details',
			
 
				-      '#name' => 'update_obo_details'
			
 
				-    );
			
 
				-    $form['obo_existing']['update_load_obo'] = array(
			
 
				-      '#type' => 'submit',
			
 
				-      '#value' => 'Import Vocabulary',
			
 
				-      '#name' => 'update_load_obo'
			
 
				-    );
			
 
				-  }
			
 
				-
			
 
				-  $form['obo_new'] = array(
			
 
				-    '#type' => 'fieldset',
			
 
				-    '#title' => t('Add a New Ontology OBO Reference'),
			
 
				-    '#collapsible' => TRUE,
			
 
				-    '#collapsed' => TRUE,
			
 
				-  );
			
 
				-
			
 
				-  $form['obo_new']['path_instructions']= array(
			
 
				-    '#value' => t('Provide the name and path for the OBO file.  If the vocabulary OBO file
			
 
				-                   is stored local to the server provide a file name. If the vocabulry is stored remotely,
			
 
				-                   provide a URL.  Only provide a URL or a local file, not both.'),
			
 
				-  );
			
 
				-
			
 
				-  $form['obo_new']['obo_name']= array(
			
 
				-    '#type'          => 'textfield',
			
 
				-    '#title'         => t('New Vocabulary Name'),
			
 
				-    '#description'   => t('Please provide a name for this vocabulary.  After upload, this name will appear in the drop down
			
 
				-                           list above for use again later.'),
			
 
				-  );
			
 
				-
			
 
				-  $form['obo_new']['obo_url']= array(
			
 
				-    '#type'          => 'textfield',
			
 
				-    '#title'         => t('Remote URL'),
			
 
				-    '#description'   => t('Please enter a URL for the online OBO file.  The file will be downloaded and parsed.
			
 
				-                           (e.g. http://www.obofoundry.org/ro/ro.obo)'),
			
 
				-  );
			
 
				-
			
 
				-  $form['obo_new']['obo_file']= array(
			
 
				-    '#type'  => 'textfield',
			
 
				-    '#title'  => t('Local File'),
			
 
				-    '#description' => t('Please enter the file system path for an OBO
			
 
				-        definition file. If entering a path relative to
			
 
				-        the Drupal installation you may use a relative path that excludes the
			
 
				-        Drupal installation directory (e.g. sites/default/files/xyz.obo). Note
			
 
				-        that Drupal relative paths have no preceeding slash.
			
 
				-        Otherwise, please provide the full path on the filesystem.  The path
			
 
				-        must be accessible to the web server on which this Drupal instance is running.'),
			
 
				-  );
			
 
				-
			
 
				-  $form['obo_new']['add_new_obo'] = array(
			
 
				-    '#type'  => 'submit',
			
 
				-    '#value' => t('Add this vocabulary'),
			
 
				-    '#name' => 'add_new_obo',
			
 
				-  );
			
 
				-
			
 
				-  $form['#redirect'] = 'admin/tripal/tripal_chado/obo_loader';
			
 
				-
			
 
				-  return $form;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- *
			
 
				- * @param $form
			
 
				- * @param $form_state
			
 
				- */
			
 
				-function tripal_cv_obo_form_validate($form, &$form_state) {
			
 
				-  $obo_id    = $form_state['values']['obo_id'];
			
 
				-  $obo_name  = trim($form_state['values']['obo_name']);
			
 
				-  $obo_url   = trim($form_state['values']['obo_url']);
			
 
				-  $obo_file  = trim($form_state['values']['obo_file']);
			
 
				-  $uobo_name  = array_key_exists('uobo_name', $form_state['values']) ? trim($form_state['values']['uobo_name']) : '';
			
 
				-  $uobo_url   = array_key_exists('uobo_url', $form_state['values']) ? trim($form_state['values']['uobo_url']) : '';
			
 
				-  $uobo_file  = array_key_exists('uobo_file', $form_state['values']) ? trim($form_state['values']['uobo_file']) : '';
			
 
				-
			
 
				-  // Make sure if the name is changed it doesn't conflict with another OBO.
			
 
				-  if ($form_state['clicked_button']['#name'] == 'update_obo_details' or
			
 
				-      $form_state['clicked_button']['#name'] == 'update_load_obo') {
			
 
				-    // Get the current record
			
 
				-    $vocab = db_select('tripal_cv_obo', 't')
			
 
				-      ->fields('t', array('obo_id', 'name', 'path'))
			
 
				-      ->condition('name', $uobo_name)
			
 
				-      ->execute()
			
 
				-      ->fetchObject();
			
 
				-    if ($vocab and $vocab->obo_id != $obo_id) {
			
 
				-      form_set_error('uobo_name', 'The vocabulary name must be different from existing vocabularies');
			
 
				-    }
			
 
				-    // Make sure the file exists. First check if it is a relative path
			
 
				-    $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $uobo_file;
			
 
				-    if (!file_exists($dfile)) {
			
 
				-      if (!file_exists($uobo_file)) {
			
 
				-        form_set_error('uobo_file', 'The specified path does not exist or cannot be read.');
			
 
				-      }
			
 
				-    }
			
 
				-    if (!$uobo_url and !$uobo_file) {
			
 
				-      form_set_error('uobo_url', 'Please provide a URL or a path for the vocabulary.');
			
 
				-    }
			
 
				-    if ($uobo_url and $uobo_file) {
			
 
				-      form_set_error('uobo_url', 'Please provide only a URL or a path for the vocabulary, but not both.');
			
 
				-    }
			
 
				-  }
			
 
				-  if ($form_state['clicked_button']['#name'] == 'add_new_obo') {
			
 
				-    // Get the current record
			
 
				-    $vocab = db_select('tripal_cv_obo', 't')
			
 
				-      ->fields('t', array('obo_id', 'name', 'path'))
			
 
				-      ->condition('name', $obo_name)
			
 
				-      ->execute()
			
 
				-      ->fetchObject();
			
 
				-    if ($vocab) {
			
 
				-      form_set_error('obo_name', 'The vocabulary name must be different from existing vocabularies');
			
 
				-    }
			
 
				-    // Make sure the file exists. First check if it is a relative path
			
 
				-    $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo_file;
			
 
				-    if (!file_exists($dfile)) {
			
 
				-      if (!file_exists($obo_file)) {
			
 
				-        form_set_error('obo_file', 'The specified path does not exist or cannot be read.');
			
 
				-      }
			
 
				-    }
			
 
				-    if (!$obo_url and !$obo_file) {
			
 
				-      form_set_error('obo_url', 'Please provide a URL or a path for the vocabulary.');
			
 
				-    }
			
 
				-    if ($obo_url and $obo_file) {
			
 
				-      form_set_error('obo_url', 'Please provide only a URL or a path for the vocabulary, but not both.');
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * The submit function for the load ontology form. It registers a
			
 
				- *   tripal job to import the user specified ontology file
			
 
				- *
			
 
				- * @param $form
			
 
				- *   The form array
			
 
				- * @param $form_state
			
 
				- *   The form state array
			
 
				- *
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_cv_obo_form_submit($form, &$form_state) {
			
 
				-
			
 
				-  $obo_id    = $form_state['values']['obo_id'];
			
 
				-  $obo_name  = trim($form_state['values']['obo_name']);
			
 
				-  $obo_url   = trim($form_state['values']['obo_url']);
			
 
				-  $obo_file  = trim($form_state['values']['obo_file']);
			
 
				-  $uobo_name  = array_key_exists('uobo_name', $form_state['values']) ? trim($form_state['values']['uobo_name']) : '';
			
 
				-  $uobo_url   = array_key_exists('uobo_url', $form_state['values']) ? trim($form_state['values']['uobo_url']) : '';
			
 
				-  $uobo_file  = array_key_exists('uobo_file', $form_state['values']) ? trim($form_state['values']['uobo_file']) : '';
			
 
				-
			
 
				-  // If the user requested to alter the details then do that.
			
 
				-  if ($form_state['clicked_button']['#name'] == 'update_obo_details' or
			
 
				-      $form_state['clicked_button']['#name'] == 'update_load_obo') {
			
 
				-    $success = db_update('tripal_cv_obo')
			
 
				-      ->fields(array(
			
 
				-        'name' => $uobo_name,
			
 
				-        'path' => $uobo_url ? $uobo_url : $uobo_file,
			
 
				-      ))
			
 
				-      ->condition('obo_id', $obo_id)
			
 
				-      ->execute();
			
 
				-    if ($success) {
			
 
				-      drupal_set_message(t("The vocabulary %vocab has been updated.", array('%vocab' => $uobo_name)));
			
 
				-    }
			
 
				-    else {
			
 
				-      drupal_set_message(t("The vocabulary %vocab could not be updated.", array('%vocab' => $uobo_name)), 'error');
			
 
				-    }
			
 
				-
			
 
				-  }
			
 
				-  // If the user requested to update and load then we've already handled the
			
 
				-  // update now we just need to load.
			
 
				-  if ($form_state['clicked_button']['#name'] == 'update_load_obo') {
			
 
				-    tripal_submit_obo_job(array('obo_id' => $obo_id));
			
 
				-  }
			
 
				-  if ($form_state['clicked_button']['#name'] == 'add_new_obo') {
			
 
				-    $success = db_insert('tripal_cv_obo')
			
 
				-    ->fields(array(
			
 
				-      'name' => $obo_name,
			
 
				-      'path' => $obo_url ? $obo_url : $obo_file,
			
 
				-    ))
			
 
				-    ->execute();
			
 
				-    if ($success) {
			
 
				-      drupal_set_message(t("The vocabulary %vocab has been added.", array('%vocab' => $obo_name)));
			
 
				-    }
			
 
				-    else {
			
 
				-      drupal_set_message(t("The vocabulary %vocab could not be added.", array('%vocab' => $obo_name)), 'error');
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * A wrapper function for importing the user specified OBO file into Chado by
			
 
				- * specifying the obo_id of the OBO. It requires that the file be in OBO v1.2
			
 
				- * compatible format.  This function is typically executed via the Tripal jobs
			
 
				- * management after a user submits a job via the Load Onotloies form.
			
 
				- *
			
 
				- * @param $obo_id
			
 
				- *   An obo_id from the tripal_cv_obo file that specifies which OBO file to import
			
 
				- * @param $job_id
			
 
				- *   The job_id of the job from the Tripal jobs management system.
			
 
				-
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_chado_load_obo_v1_2_id($obo_id, $jobid = NULL) {
			
 
				-
			
 
				-  // Get the OBO reference.
			
 
				-  $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = :obo_id";
			
 
				-  $obo = db_query($sql, array(':obo_id' => $obo_id))->fetchObject();
			
 
				-
			
 
				-  // Convert the module name to the real path if present
			
 
				-  if (preg_match("/\{(.*?)\}/", $obo->path, $matches)) {
			
 
				-    $module = $matches[1];
			
 
				-    $path = drupal_realpath(drupal_get_path('module', $module));
			
 
				-    $obo->path = preg_replace("/\{.*?\}/", $path, $obo->path);
			
 
				-  }
			
 
				-
			
 
				-  // if the reference is for a remote URL then run the URL processing function
			
 
				-  if (preg_match("/^https:\/\//", $obo->path) or
			
 
				-      preg_match("/^http:\/\//", $obo->path) or
			
 
				-      preg_match("/^ftp:\/\//", $obo->path)) {
			
 
				-    tripal_chado_load_obo_v1_2_url($obo->name, $obo->path, $jobid, 0);
			
 
				-  }
			
 
				-  // if the reference is for a local file then run the file processing function
			
 
				-  else {
			
 
				-    // check to see if the file is located local to Drupal
			
 
				-    $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path;
			
 
				-    if (file_exists($dfile)) {
			
 
				-      tripal_chado_load_obo_v1_2_file($obo->name, $dfile , $jobid, 0);
			
 
				-    }
			
 
				-    // if not local to Drupal, the file must be someplace else, just use
			
 
				-    // the full path provided
			
 
				-    else {
			
 
				-      if (file_exists($obo->path)) {
			
 
				-        tripal_chado_load_obo_v1_2_file($obo->name, $obo->path, $jobid, 0);
			
 
				-      }
			
 
				-      else {
			
 
				-        print "ERROR: could not find OBO file: '$obo->path'\n";
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * A wrapper function for importing the user specified OBO file into Chado by
			
 
				- * specifying the filename and path of the OBO. It requires that the file be in OBO v1.2
			
 
				- * compatible format.  This function is typically executed via the Tripal jobs
			
 
				- * management after a user submits a job via the Load Onotloies form.
			
 
				- *
			
 
				- * @param $obo_name
			
 
				- *   The name of the OBO (typially the ontology or controlled vocabulary name)
			
 
				- * @param $file
			
 
				- *   The path on the file system where the ontology can be found
			
 
				- * @param $job_id
			
 
				- *   The job_id of the job from the Tripal jobs management system.
			
 
				- * @param $is_new
			
 
				- *   Set to TRUE if this is a new ontology that does not yet exist in the
			
 
				- *   tripal_cv_obo table.  If TRUE the OBO will be added to the table.
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_chado_load_obo_v1_2_file($obo_name, $file, $jobid = NULL, $is_new = TRUE) {
			
 
				-  $newcvs = array();
			
 
				-
			
 
				-  if ($is_new) {
			
 
				-    tripal_insert_obo($obo_name, $file);
			
 
				-  }
			
 
				-
			
 
				-  $success = tripal_chado_load_obo_v1_2($file, $jobid, $newcvs);
			
 
				-  if ($success) {
			
 
				-    // update the cvtermpath table
			
 
				-    tripal_chado_load_update_cvtermpath($newcvs, $jobid);
			
 
				-    print "\nDone\n";
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * A wrapper function for importing the user specified OBO file into Chado by
			
 
				- * specifying the remote URL of the OBO. It requires that the file be in OBO v1.2
			
 
				- * compatible format.  This function is typically executed via the Tripal jobs
			
 
				- * management after a user submits a job via the Load Onotloies form.
			
 
				- *
			
 
				- * @param $obo_name
			
 
				- *   The name of the OBO (typially the ontology or controlled vocabulary name)
			
 
				- * @param $url
			
 
				- *   The remote URL of the OBO file.
			
 
				- * @param $job_id
			
 
				- *   The job_id of the job from the Tripal jobs management system.
			
 
				- * @param $is_new
			
 
				- *   Set to TRUE if this is a new ontology that does not yet exist in the
			
 
				- *   tripal_cv_obo table.  If TRUE the OBO will be added to the table.
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_chado_load_obo_v1_2_url($obo_name, $url, $jobid = NULL, $is_new = TRUE) {
			
 
				-
			
 
				-  $newcvs = array();
			
 
				-
			
 
				-  // first download the OBO
			
 
				-  $temp = tempnam(sys_get_temp_dir(), 'obo_');
			
 
				-  print "Downloading URL $url, saving to $temp\n";
			
 
				-  $url_fh = fopen($url, "r");
			
 
				-  $obo_fh = fopen($temp, "w");
			
 
				-  if (!$url_fh) {
			
 
				-    tripal_cv_obo_quiterror("Unable to download the remote OBO file at $url. Could a firewall be blocking outgoing connections? " .
			
 
				-      " if you are unable to download the file you may manually downlod the OBO file and use the web interface to " .
			
 
				-      " specify the location of the file on your server.");
			
 
				-
			
 
				-  }
			
 
				-  while (!feof($url_fh)) {
			
 
				-    fwrite($obo_fh, fread($url_fh, 255), 255);
			
 
				-  }
			
 
				-  fclose($url_fh);
			
 
				-  fclose($obo_fh);
			
 
				-
			
 
				-  if ($is_new) {
			
 
				-    tripal_insert_obo($obo_name, $url);
			
 
				-  }
			
 
				-
			
 
				-  // second, parse the OBO
			
 
				-  $success = tripal_chado_load_obo_v1_2($temp, $jobid, $newcvs);
			
 
				-  if ($success) {
			
 
				-
			
 
				-    // update the cvtermpath table
			
 
				-    tripal_chado_load_update_cvtermpath($newcvs, $jobid);
			
 
				-    print "Done\n";
			
 
				-  }
			
 
				-  // now remove the temp file
			
 
				-  unlink($temp);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * A function for executing the cvtermpath function of Chado.  This function
			
 
				- * populates the cvtermpath table of Chado for quick lookup of term
			
 
				- * relationships
			
 
				- *
			
 
				- * @param $newcvs
			
 
				- *   An associative array of controlled vocabularies to update.  The key must be
			
 
				- *   the name of the vocabulary and the value the cv_id from the cv table of chado.
			
 
				- * @param $jobid
			
 
				- *   The job_id of the job from the Tripal jobs management system.
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_chado_load_update_cvtermpath($newcvs, $jobid) {
			
 
				-
			
 
				-  print "\nUpdating cvtermpath table.  This may take a while...\n";
			
 
				-  foreach ($newcvs as $namespace => $cvid) {
			
 
				-    tripal_update_cvtermpath($cvid, $jobid);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Imports a given OBO file into Chado.  This function is usually called by
			
 
				- * one of three wrapper functions:  tripal_chado_load_obo_v1_2_id,
			
 
				- * tripal_chado_load_obo_v1_2_file or tirpal_cv_load_obo_v1_2_url. But, it can
			
 
				- * be called directly if the full path to an OBO file is available on the
			
 
				- * file system.
			
 
				- *
			
 
				- * @param $flie
			
 
				- *   The full path to the OBO file on the file system
			
 
				- * @param $jobid
			
 
				- *   The job_id of the job from the Tripal jobs management system.
			
 
				- * @param $newcvs
			
 
				- *   An empty array passed by reference that upon return will contain the list
			
 
				- *   of newly added vocabularies.  The key will contain the CV name and the
			
 
				- *   value the new cv_id
			
 
				- *
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_chado_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
			
 
				-
			
 
				-  //$transaction = db_transaction();
			
 
				-
			
 
				-  print "\nNOTE: Loading of this OBO file is performed using a database transaction. \n" .
			
 
				-      "If the load fails or is terminated prematurely then the entire set of \n" .
			
 
				-      "insertions/updates is rolled back and will not be found in the database\n\n";
			
 
				-  try {
			
 
				-    $header = array();
			
 
				-
			
 
				-    // make sure our temporary table exists
			
 
				-    $ret = array();
			
 
				-
			
 
				-    // empty the temp table
			
 
				-    $sql = "DELETE FROM {tripal_obo_temp}";
			
 
				-    chado_query($sql);
			
 
				-
			
 
				-    print "Step 1: Preloading File $file\n";
			
 
				-
			
 
				-    // parse the obo file
			
 
				-    $default_db = tripal_cv_obo_parse($file, $header, $jobid);
			
 
				-
			
 
				-    // add the CV for this ontology to the database.  The v1.2 definition
			
 
				-    // specifies a 'default-namespace' to be used if a 'namespace' is not
			
 
				-    // present for each stanza.  Some ontologies have adopted the v1.4 method
			
 
				-    // in their v1.2 files and not including it.
			
 
				-    if (array_key_exists('default-namespace', $header)) {
			
 
				-      $defaultcv = tripal_insert_cv($header['default-namespace'][0], '');
			
 
				-      if (!$defaultcv) {
			
 
				-        tripal_cv_obo_quiterror('Cannot add namespace ' . $header['default-namespace'][0]);
			
 
				-      }
			
 
				-      $newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
			
 
				-    }
			
 
				-    // if the 'default-namespace' is missing
			
 
				-    else {
			
 
				-
			
 
				-      // look to see if an 'ontology' key is present.  It is part of the v1.4
			
 
				-      // specification so it shouldn't be in the file, but just in case
			
 
				-      if (array_key_exists('ontology', $header)) {
			
 
				-        $defaultcv = tripal_insert_cv(strtoupper($header['ontology'][0]), '');
			
 
				-        if (!$defaultcv) {
			
 
				-          tripal_cv_obo_quiterror('Cannot add namespace ' . strtoupper($header['ontology'][0]));
			
 
				-        }
			
 
				-        $newcvs[strtoupper(strtoupper($header['ontology'][0]))] = $defaultcv->cv_id;
			
 
				-      }
			
 
				-      else {
			
 
				-        tripal_cv_obo_quiterror("Could not find a namespace for this OBO file.");
			
 
				-      }
			
 
				-      watchdog('t_obo_loader', "This OBO is missing the 'default-namespace' header. It is not possible to determine which vocabulary terms without a 'namespace' key should go.  Instead, those terms will be placed in the '%vocab' vocabulary.",
			
 
				-        array('%vocab' => $defaultcv->name), WATCHDOG_WARNING);
			
 
				-    }
			
 
				-
			
 
				-    // add any typedefs to the vocabulary first
			
 
				-    print "\nStep 2: Loading type defs...\n";
			
 
				-    tripal_cv_obo_load_typedefs($defaultcv, $newcvs, $default_db, $jobid);
			
 
				-
			
 
				-    // next add terms to the vocabulary
			
 
				-    print "\nStep 3: Loading terms...\n";
			
 
				-    if (!tripal_cv_obo_process_terms($defaultcv, $jobid, $newcvs, $default_db)) {
			
 
				-      tripal_cv_obo_quiterror('Cannot add terms from this ontology');
			
 
				-    }
			
 
				-  }
			
 
				-  catch (Exception $e) {
			
 
				-   //$transaction->rollback();
			
 
				-    print "\n"; // make sure we start errors on new line
			
 
				-    print "FAILED. Rolling back database changes...\n";
			
 
				-    watchdog_exception('T_obo_loader', $e);
			
 
				-    return FALSE;
			
 
				-  }
			
 
				-
			
 
				-  return TRUE;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Immediately terminates loading of the OBO file.
			
 
				- *
			
 
				- * @param $message
			
 
				- *   The error message to present to the user
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_cv_obo_quiterror($message) {
			
 
				-
			
 
				-  tripal_report_error("T_obo_loader", TRIPAL_ERROR, $message, array());
			
 
				-  exit;
			
 
				-
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * OBO files are divided into a typedefs terms section and vocabulary terms section.
			
 
				- * This function loads the typedef terms from the OBO.
			
 
				- *
			
 
				- * @param $defaultcv
			
 
				- *   A database object containing a record from the cv table for the
			
 
				- *   default controlled vocabulary
			
 
				- * @param $newcvs
			
 
				- *   An associative array of controlled vocabularies for this OBO.  The key must be
			
 
				- *   the name of the vocabulary and the value the cv_id from the cv table of chado.
			
 
				- * @param $default_db
			
 
				- *   The name of the default database.
			
 
				- * @param $jobid
			
 
				- *   The job_id of the job from the Tripal jobs management system.
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_cv_obo_load_typedefs($defaultcv, $newcvs, $default_db, $jobid) {
			
 
				-  $sql = "SELECT * FROM {tripal_obo_temp} WHERE type = 'Typedef' ";
			
 
				-  $typedefs = chado_query($sql);
			
 
				-
			
 
				-  $sql = "
			
 
				-    SELECT count(*) as num_terms
			
 
				-    FROM {tripal_obo_temp}
			
 
				-    WHERE type = 'Typedef'
			
 
				-  ";
			
 
				-  $result = chado_query($sql)->fetchObject();
			
 
				-  $count = $result->num_terms;
			
 
				-
			
 
				-  // calculate the interval for updates
			
 
				-  $interval = intval($count * 0.0001);
			
 
				-  if ($interval < 1) {
			
 
				-    $interval = 1;
			
 
				-  }
			
 
				-  $i = 0;
			
 
				-  foreach ($typedefs as $typedef) {
			
 
				-    $term = unserialize(base64_decode($typedef->stanza));
			
 
				-
			
 
				-    // update the job status every interval
			
 
				-    if ($i % $interval == 0) {
			
 
				-      $complete = ($i / $count) * 33.33333333;
			
 
				-      if ($jobid) {
			
 
				-        tripal_set_job_progress($jobid, intval($complete + 33.33333333));
			
 
				-      }
			
 
				-      printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
			
 
				-    }
			
 
				-
			
 
				-    tripal_cv_obo_process_term($term, $defaultcv->name, 1, $newcvs, $default_db);
			
 
				-
			
 
				-    $i++;
			
 
				-  }
			
 
				-
			
 
				-  // Set the final status.
			
 
				-  if ($count > 0) {
			
 
				-    $complete = ($i / $count) * 33.33333333;
			
 
				-  }
			
 
				-  else {
			
 
				-    $complete = 33.33333333;
			
 
				-  }
			
 
				-  if ($jobid) {
			
 
				-    tripal_set_job_progress($jobid, intval($complete + 33.33333333));
			
 
				-  }
			
 
				-  printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
			
 
				-
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * OBO files are divided into a typedefs section and a terms section.  This
			
 
				- * function loads the typedef terms from the OBO.
			
 
				- *
			
 
				- * @param $defaultcv
			
 
				- *   A database object containing a record from the cv table for the
			
 
				- *   default controlled vocabulary
			
 
				- * @param $jobid
			
 
				- *  The job_id of the job from the Tripal jobs management system.
			
 
				- * @param $newcvs
			
 
				- *   An associative array of controlled vocabularies for this OBO.  The key must be
			
 
				- *   the name of the vocabulary and the value the cv_id from the cv table of chado.
			
 
				- * @param $default_db
			
 
				- *   The name of the default database.
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_cv_obo_process_terms($defaultcv, $jobid = NULL, &$newcvs, $default_db) {
			
 
				-
			
 
				-  $i = 0;
			
 
				-
			
 
				-  // iterate through each term from the OBO file and add it
			
 
				-  $sql = "
			
 
				-    SELECT * FROM {tripal_obo_temp}
			
 
				-    WHERE type = 'Term'
			
 
				-    ORDER BY id
			
 
				-  ";
			
 
				-  $terms = chado_query($sql);
			
 
				-
			
 
				-  $sql = "
			
 
				-    SELECT count(*) as num_terms
			
 
				-    FROM {tripal_obo_temp}
			
 
				-    WHERE type = 'Term'
			
 
				-  ";
			
 
				-  $result = chado_query($sql)->fetchObject();
			
 
				-  $count = $result->num_terms;
			
 
				-
			
 
				-  // calculate the interval for updates
			
 
				-  $interval = intval($count * 0.0001);
			
 
				-  if ($interval < 1) {
			
 
				-    $interval = 1;
			
 
				-  }
			
 
				-  foreach ($terms as $t) {
			
 
				-    $term = unserialize(base64_decode($t->stanza));
			
 
				-
			
 
				-    // update the job status every interval
			
 
				-    if ($i % $interval == 0) {
			
 
				-      $complete = ($i / $count) * 33.33333333;
			
 
				-      if ($jobid) {
			
 
				-        tripal_set_job_progress($jobid, intval($complete + 66.666666));
			
 
				-      }
			
 
				-      printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
			
 
				-    }
			
 
				-
			
 
				-    // add/update this term
			
 
				-    if (!tripal_cv_obo_process_term($term, $defaultcv->name, 0, $newcvs, $default_db)) {
			
 
				-      tripal_cv_obo_quiterror("Failed to process terms from the ontology");
			
 
				-    }
			
 
				-
			
 
				-    $i++;
			
 
				-  }
			
 
				-
			
 
				-  // set the final status
			
 
				-  if ($count > 0) {
			
 
				-    $complete = ($i / $count) * 33.33333333;
			
 
				-  }
			
 
				-  else {
			
 
				-    $complete = 33.33333333;
			
 
				-  }
			
 
				-  if ($jobid) {
			
 
				-    tripal_set_job_progress($jobid, intval($complete + 66.666666));
			
 
				-  }
			
 
				-  printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
			
 
				-
			
 
				-
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Uses the provided term array to add/update information to Chado about the
			
 
				- * term including the term, dbxref, synonyms, properties, and relationships.
			
 
				- *
			
 
				- * @param $term
			
 
				- *   An array representing the cvterm.
			
 
				- * @param $defaultcv
			
 
				- *   The name of the default controlled vocabulary
			
 
				- * @is_relationship
			
 
				- *   Set to 1 if this term is a relationship term
			
 
				- * @default_db
			
 
				- *   The name of the default database.
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_cv_obo_process_term($term, $defaultcv, $is_relationship = 0, &$newcvs, $default_db) {
			
 
				-
			
 
				-  // make sure we have a namespace for this term
			
 
				-  if (!array_key_exists('namespace', $term) and !($defaultcv or $defaultcv == '')) {
			
 
				-    tripal_cv_obo_quiterror("Cannot add the term: no namespace defined. " . $term['id'][0]);
			
 
				-  }
			
 
				-
			
 
				-  // construct the term array for sending to the tripal_chado_add_cvterm function
			
 
				-  // for adding a new cvterm
			
 
				-  $t = array();
			
 
				-  $t['id'] = $term['id'][0];
			
 
				-  $t['name'] = $term['name'][0];
			
 
				-  if (array_key_exists('def', $term)) {
			
 
				-    $t['definition'] = $term['def'][0];
			
 
				-  }
			
 
				-  if (array_key_exists('subset', $term)) {
			
 
				-    $t['subset'] = $term['subset'][0];
			
 
				-  }
			
 
				-  if (array_key_exists('namespace', $term)) {
			
 
				-    $t['namespace'] = $term['namespace'][0];
			
 
				-  }
			
 
				-  if (array_key_exists('is_obsolete', $term)) {
			
 
				-    $t['is_obsolete'] = $term['is_obsolete'][0];
			
 
				-  }
			
 
				-
			
 
				-  $t['cv_name'] = $defaultcv;
			
 
				-  $t['is_relationship'] = $is_relationship;
			
 
				-  $t['db_name'] = $default_db;
			
 
				-
			
 
				-  // add the cvterm
			
 
				-  $cvterm = tripal_insert_cvterm($t, array('update_existing' => TRUE));
			
 
				-  if (!$cvterm) {
			
 
				-    tripal_cv_obo_quiterror("Cannot add the term " . $term['id'][0]);
			
 
				-  }
			
 
				-
			
 
				-  if (array_key_exists('namespace', $term)) {
			
 
				-    $newcvs[$term['namespace'][0]] = $cvterm->cv_id;
			
 
				-  }
			
 
				-
			
 
				-  // now handle other properites
			
 
				-  if (array_key_exists('is_anonymous', $term)) {
			
 
				-    //print "WARNING: unhandled tag: is_anonymous\n";
			
 
				-  }
			
 
				-  if (array_key_exists('alt_id', $term)) {
			
 
				-    foreach ($term['alt_id'] as $alt_id) {
			
 
				-      if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $alt_id)) {
			
 
				-        tripal_cv_obo_quiterror("Cannot add alternate id $alt_id");
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  if (array_key_exists('subset', $term)) {
			
 
				-    //print "WARNING: unhandled tag: subset\n";
			
 
				-  }
			
 
				-  // add synonyms for this cvterm
			
 
				-  if (array_key_exists('synonym', $term)) {
			
 
				-    if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
			
 
				-      tripal_cv_obo_quiterror("Cannot add synonyms");
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym'
			
 
				-  // types to be of the v1.2 standard
			
 
				-  if (array_key_exists('exact_synonym', $term) or array_key_exists('narrow_synonym', $term) or array_key_exists('broad_synonym', $term)) {
			
 
				-    if (array_key_exists('exact_synonym', $term)) {
			
 
				-      foreach ($term['exact_synonym'] as $synonym) {
			
 
				-        $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 EXACT $2', $synonym);
			
 
				-        $term['synonym'][] = $new;
			
 
				-      }
			
 
				-    }
			
 
				-    if (array_key_exists('narrow_synonym', $term)) {
			
 
				-      foreach ($term['narrow_synonym'] as $synonym) {
			
 
				-        $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 NARROW $2', $synonym);
			
 
				-        $term['synonym'][] = $new;
			
 
				-      }
			
 
				-    }
			
 
				-    if (array_key_exists('broad_synonym', $term)) {
			
 
				-      foreach ($term['broad_synonym'] as $synonym) {
			
 
				-        $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 BROAD $2', $synonym);
			
 
				-        $term['synonym'][] = $new;
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
			
 
				-      tripal_cv_obo_quiterror("Cannot add/update synonyms");
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // add the comment to the cvtermprop table
			
 
				-  if (array_key_exists('comment', $term)) {
			
 
				-    $comments = $term['comment'];
			
 
				-    $j = 0;
			
 
				-    foreach ($comments as $comment) {
			
 
				-      if (!tripal_cv_obo_add_cvterm_prop($cvterm, 'comment', $comment, $j)) {
			
 
				-        tripal_cv_obo_quiterror("Cannot add/update cvterm property");
			
 
				-      }
			
 
				-      $j++;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // add any other external dbxrefs
			
 
				-  if (array_key_exists('xref', $term)) {
			
 
				-    foreach ($term['xref'] as $xref) {
			
 
				-      if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
			
 
				-        tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  if (array_key_exists('xref_analog', $term)) {
			
 
				-    foreach ($term['xref_analog'] as $xref) {
			
 
				-      if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
			
 
				-        tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  if (array_key_exists('xref_unk', $term)) {
			
 
				-    foreach ($term['xref_unk'] as $xref) {
			
 
				-      if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
			
 
				-        tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // add is_a relationships for this cvterm
			
 
				-  if (array_key_exists('is_a', $term)) {
			
 
				-    foreach ($term['is_a'] as $is_a) {
			
 
				-      if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, 'is_a', $is_a, $is_relationship, $default_db)) {
			
 
				-        tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a");
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  if (array_key_exists('intersection_of', $term)) {
			
 
				-    //print "WARNING: unhandled tag: intersection_of\n";
			
 
				-  }
			
 
				-  if (array_key_exists('union_of', $term)) {
			
 
				-    //print "WARNING: unhandled tag: union_on\n";
			
 
				-  }
			
 
				-  if (array_key_exists('disjoint_from', $term)) {
			
 
				-    //print "WARNING: unhandled tag: disjoint_from\n";
			
 
				-  }
			
 
				-  if (array_key_exists('relationship', $term)) {
			
 
				-    foreach ($term['relationship'] as $value) {
			
 
				-      $rel = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
			
 
				-      $object = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
			
 
				-      // The Gene Ontology uses 'has_part' for transitive relationships, but
			
 
				-      // it specifically indicates that 'has_part' should not be used for
			
 
				-      // grouping annotations.  Unfortunately, this means that when we
			
 
				-      // try to popoulate the cvtermpath table a 'has_part' relationships
			
 
				-      // will be used for exactly that purpose: to group annotations.  This
			
 
				-      // doesn't seem to the be the case for other vocabularies such as the
			
 
				-      // sequence ontology that uses has_part as primary relationship between
			
 
				-      // terms. So, when loading the GO, we'll not include has_part
			
 
				-      // relationships.
			
 
				-      /*if ($rel == 'has_part' and $cvterm->dbxref_id->db_id->name == 'GO') {
			
 
				-        continue;
			
 
				-      }*/
			
 
				-      if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel, $object, $is_relationship, $default_db)) {
			
 
				-        tripal_cv_obo_quiterror("Cannot add relationship $rel: $object");
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  if (array_key_exists('replaced_by', $term)) {
			
 
				-   //print "WARNING: unhandled tag: replaced_by\n";
			
 
				-  }
			
 
				-  if (array_key_exists('consider', $term)) {
			
 
				-    //print "WARNING: unhandled tag: consider\n";
			
 
				-  }
			
 
				-  if (array_key_exists('use_term', $term)) {
			
 
				-    //print "WARNING: unhandled tag: user_term\n";
			
 
				-  }
			
 
				-  if (array_key_exists('builtin', $term)) {
			
 
				-    //print "WARNING: unhandled tag: builtin\n";
			
 
				-  }
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Adds a cvterm relationship
			
 
				- *
			
 
				- * @param $cvterm
			
 
				- *   A database object for the cvterm
			
 
				- * @param $rel
			
 
				- *   The relationship name
			
 
				- * @param $objname
			
 
				- *   The relationship term name
			
 
				- * @param $defaultcv
			
 
				- *   A database object containing a record from the cv table for the
			
 
				- *   default controlled vocabulary
			
 
				- * @object_is_relationship
			
 
				- *   Set to 1 if this term is a relationship term
			
 
				- * @default_db
			
 
				- *   The name of the default database.
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel,
			
 
				-  $objname, $object_is_relationship = 0, $default_db = 'OBO_REL') {
			
 
				-
			
 
				-  // make sure the relationship cvterm exists
			
 
				-  $term = array(
			
 
				-    'name' => $rel,
			
 
				-    'id' => "$default_db:$rel",
			
 
				-    'definition' => '',
			
 
				-    'is_obsolete' => 0,
			
 
				-    'cv_name' => $defaultcv,
			
 
				-    'is_relationship' => TRUE,
			
 
				-    'db_naame' => $default_db
			
 
				-  );
			
 
				-  $relcvterm = tripal_insert_cvterm($term, array('update_existing' => FALSE));
			
 
				-
			
 
				-  if (!$relcvterm) {
			
 
				-    // if the relationship term couldn't be found in the default_db provided
			
 
				-    // then do on more check to find it in the relationship ontology
			
 
				-    $term = array(
			
 
				-      'name' => $rel,
			
 
				-      'id' => "OBO_REL:$rel",
			
 
				-      'definition' => '',
			
 
				-      'is_obsolete' => 0,
			
 
				-      'cv_name' => $defaultcv,
			
 
				-      'is_relationship' => TRUE,
			
 
				-      'db_name' => 'OBO_REL'
			
 
				-    );
			
 
				-    $relcvterm = tripal_insert_cvterm($term, array('update_existing' => FALSE));
			
 
				-    if (!$relcvterm) {
			
 
				-      tripal_cv_obo_quiterror("Cannot find the relationship term in the current ontology or in the relationship ontology: $rel\n");
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // get the object term
			
 
				-  $oterm = tripal_cv_obo_get_term($objname);
			
 
				-  if (!$oterm) {
			
 
				-    tripal_cv_obo_quiterror("Could not find object term $objname\n");
			
 
				-  }
			
 
				-
			
 
				-  $objterm = array();
			
 
				-  $objterm['id']            = $oterm['id'][0];
			
 
				-  $objterm['name']          = $oterm['name'][0];
			
 
				-  if (array_key_exists('def', $oterm)) {
			
 
				-    $objterm['definition']           = $oterm['def'][0];
			
 
				-  }
			
 
				-  if (array_key_exists('subset', $oterm)) {
			
 
				-    $objterm['subset']      = $oterm['subset'][0];
			
 
				-  }
			
 
				-  if (array_key_exists('namespace', $oterm)) {
			
 
				-    $objterm['namespace']   = $oterm['namespace'][0];
			
 
				-  }
			
 
				-  if (array_key_exists('is_obsolete', $oterm)) {
			
 
				-    $objterm['is_obsolete'] = $oterm['is_obsolete'][0];
			
 
				-  }
			
 
				-
			
 
				-  $objterm['cv_name' ] = $defaultcv;
			
 
				-  $objterm['is_relationship'] = $object_is_relationship;
			
 
				-  $objterm['db_name'] = $default_db;
			
 
				-
			
 
				-  $objcvterm = tripal_insert_cvterm($objterm, array('update_existing' => TRUE));
			
 
				-  if (!$objcvterm) {
			
 
				-    tripal_cv_obo_quiterror("Cannot add cvterm " . $oterm['name'][0]);
			
 
				-  }
			
 
				-
			
 
				-  // check to see if the cvterm_relationship already exists, if not add it
			
 
				-  $values = array(
			
 
				-    'type_id'    => $relcvterm->cvterm_id,
			
 
				-    'subject_id' => $cvterm->cvterm_id,
			
 
				-    'object_id'  => $objcvterm->cvterm_id
			
 
				-  );
			
 
				-  $result = chado_select_record('cvterm_relationship', array('*'), $values);
			
 
				-  if (count($result) == 0) {
			
 
				-    $options = array('return_record' => FALSE);
			
 
				-    $success = chado_insert_record('cvterm_relationship', $values, $options);
			
 
				-    if (!$success) {
			
 
				-      tripal_cv_obo_quiterror("Cannot add term relationship: '$cvterm->name' $rel '$objcvterm->name'");
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  return TRUE;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Retreives the term array from the temp loading table for a given term id.
			
 
				- *
			
 
				- * @param id
			
 
				- *   The id of the term to retrieve
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_cv_obo_get_term($id) {
			
 
				-  $values = array('id' => $id);
			
 
				-  $result = chado_select_record('tripal_obo_temp', array('stanza'), $values);
			
 
				-  if (count($result) == 0) {
			
 
				-    return FALSE;
			
 
				-  }
			
 
				-  return unserialize(base64_decode($result[0]->stanza));
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Adds the synonyms to a term
			
 
				- *
			
 
				- * @param term
			
 
				- *   An array representing the cvterm. It must have a 'synonym' key/value pair.
			
 
				- * @param cvterm
			
 
				- *   The database object of the cvterm to which the synonym will be added.
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_cv_obo_add_synonyms($term, $cvterm) {
			
 
				-
			
 
				-  // make sure we have a 'synonym_type' vocabulary
			
 
				-  $syncv = tripal_insert_cv(
			
 
				-    'synonym_type',
			
 
				-    'A local vocabulary added for synonym types.'
			
 
				-  );
			
 
				-
			
 
				-  // now add the synonyms
			
 
				-  if (array_key_exists('synonym', $term)) {
			
 
				-    foreach ($term['synonym'] as $synonym) {
			
 
				-
			
 
				-      // separate out the synonym definition and the synonym type
			
 
				-      $def = preg_replace('/^\s*"(.*)"\s*.*$/', '\1', $synonym);
			
 
				-      // the scope will be 'EXACT', etc...
			
 
				-      $scope = drupal_strtolower(preg_replace('/^.*"\s+(.*?)\s+.*$/', '\1', $synonym));
			
 
				-      if (!$scope) {  // if no scope then default to 'exact'
			
 
				-        $scope = 'exact';
			
 
				-      }
			
 
				-
			
 
				-      // make sure the synonym type exists in the 'synonym_type' vocabulary
			
 
				-      $values = array(
			
 
				-        'name' => $scope,
			
 
				-        'cv_id' => array(
			
 
				-          'name' => 'synonym_type',
			
 
				-        ),
			
 
				-      );
			
 
				-      $syntype = tripal_get_cvterm($values);
			
 
				-
			
 
				-      // if it doesn't exist then add it
			
 
				-      if (!$syntype) {
			
 
				-        // build a 'term' object so we can add the missing term
			
 
				-        $term = array(
			
 
				-           'name' => $scope,
			
 
				-           'id' => "synonym_type:$scope",
			
 
				-           'definition' => '',
			
 
				-           'is_obsolete' => 0,
			
 
				-           'cv_name' => $syncv->name,
			
 
				-           'is_relationship' => FALSE
			
 
				-        );
			
 
				-        $syntype = tripal_insert_cvterm($term, array('update_existing' => TRUE));
			
 
				-        if (!$syntype) {
			
 
				-          tripal_cv_obo_quiterror("Cannot add synonym type: internal:$scope");
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      // make sure the synonym doesn't already exists
			
 
				-      $values = array(
			
 
				-        'cvterm_id' => $cvterm->cvterm_id,
			
 
				-        'synonym' => $def
			
 
				-      );
			
 
				-      $results = chado_select_record('cvtermsynonym', array('*'), $values);
			
 
				-      if (count($results) == 0) {
			
 
				-        $values = array(
			
 
				-          'cvterm_id' => $cvterm->cvterm_id,
			
 
				-          'synonym' => $def,
			
 
				-          'type_id' => $syntype->cvterm_id
			
 
				-        );
			
 
				-        $options = array('return_record' => FALSE);
			
 
				-        $success = chado_insert_record('cvtermsynonym', $values, $options);
			
 
				-        if (!$success) {
			
 
				-          tripal_cv_obo_quiterror("Failed to insert the synonym for term: $name ($def)");
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      // now add the dbxrefs for the synonym if we have a comma in the middle
			
 
				-      // of a description then this will cause problems when splitting os lets
			
 
				-      // just change it so it won't mess up our splitting and then set it back
			
 
				-      // later.
			
 
				-      /**
			
 
				-      $synonym = preg_replace('/(".*?),\s(.*?")/','$1,_$2',$synonym);
			
 
				-      $dbxrefs = preg_split("/, /",preg_replace('/^.*\[(.*?)\]$/','\1',$synonym));
			
 
				-      foreach ($dbxrefs as $dbxref) {
			
 
				-       $dbxref = preg_replace('/,_/',", ",$dbxref);
			
 
				-        if ($dbxref) {
			
 
				-          tripal_cv_obo_add_cvterm_dbxref($syn,$dbxref);
			
 
				-        }
			
 
				-      }
			
 
				-      */
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  return TRUE;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Parse the OBO file and populate the templ loading table
			
 
				- *
			
 
				- * @param $file
			
 
				- *   The path on the file system where the ontology can be found
			
 
				- * @param $header
			
 
				- *   An array passed by reference that will be populated with the header
			
 
				- *   information from the OBO file
			
 
				- * @param $jobid
			
 
				- *   The job_id of the job from the Tripal jobs management system.
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_cv_obo_parse($obo_file, &$header, $jobid) {
			
 
				-  $in_header = 1;
			
 
				-  $stanza = array();
			
 
				-  $default_db = '';
			
 
				-  $line_num = 0;
			
 
				-  $num_read = 0;
			
 
				-  $intv_read = 0;
			
 
				-
			
 
				-  $filesize = filesize($obo_file);
			
 
				-  $interval = intval($filesize * 0.01);
			
 
				-  if ($interval < 1) {
			
 
				-    $interval = 1;
			
 
				-  }
			
 
				-
			
 
				-  // iterate through the lines in the OBO file and parse the stanzas
			
 
				-  $fh = fopen($obo_file, 'r');
			
 
				-  while ($line = fgets($fh)) {
			
 
				-
			
 
				-    $line_num++;
			
 
				-    $size = drupal_strlen($line);
			
 
				-    $num_read += $size;
			
 
				-    $intv_read += $size;
			
 
				-    $line = trim($line);
			
 
				-
			
 
				-    // update the job status every 1% features
			
 
				-    if ($intv_read >= $interval) {
			
 
				-      $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
			
 
				-      print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
			
 
				-      if ($jobid) {
			
 
				-        tripal_set_job_progress($jobid, intval(($num_read / $filesize) * 33.33333333));
			
 
				-      }
			
 
				-      $intv_read = 0;
			
 
				-    }
			
 
				-
			
 
				-    // remove newlines
			
 
				-    $line = rtrim($line);
			
 
				-
			
 
				-    // remove any special characters that may be hiding
			
 
				-    $line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);
			
 
				-
			
 
				-    // skip empty lines
			
 
				-    if (strcmp($line, '') == 0) {
			
 
				-      continue;
			
 
				-    }
			
 
				-
			
 
				-    //remove comments from end of lines
			
 
				-    $line = preg_replace('/^(.*?)\!.*$/', '\1', $line);  // TODO: if the explamation is escaped
			
 
				-
			
 
				-    // at the first stanza we're out of header
			
 
				-    if (preg_match('/^\s*\[/', $line)) {
			
 
				-      $in_header = 0;
			
 
				-
			
 
				-      // store the stanza we just finished reading
			
 
				-      if (sizeof($stanza) > 0) {
			
 
				-        // add the term to the temp table
			
 
				-        $values = array(
			
 
				-          'id' => $stanza['id'][0],
			
 
				-          'stanza' => base64_encode(serialize($stanza)),
			
 
				-          'type' => $type,
			
 
				-        );
			
 
				-        $success = chado_insert_record('tripal_obo_temp', $values);
			
 
				-        if (!$success) {
			
 
				-          tripal_report_error('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
			
 
				-          exit;
			
 
				-        }
			
 
				-
			
 
				-      }
			
 
				-      // get the stanza type:  Term, Typedef or Instance
			
 
				-      $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);
			
 
				-
			
 
				-      // start fresh with a new array
			
 
				-      $stanza = array();
			
 
				-      continue;
			
 
				-    }
			
 
				-    // break apart the line into the tag and value but ignore any escaped colons
			
 
				-    preg_replace("/\\:/", "|-|-|", $line); // temporarily replace escaped colons
			
 
				-    $pair = explode(":", $line, 2);
			
 
				-    $tag = $pair[0];
			
 
				-    $value = ltrim(rtrim($pair[1]));// remove surrounding spaces
			
 
				-
			
 
				-    // if this is the ID then look for the default DB
			
 
				-    $matches = array();
			
 
				-    if ($tag == 'id' and preg_match('/^(.+?):.*$/', $value, $matches)) {
			
 
				-       $default_db = $matches[1];
			
 
				-    }
			
 
				-
			
 
				-    $tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
			
 
				-    $value = preg_replace("/\|-\|-\|/", "\:", $value);
			
 
				-    if ($in_header) {
			
 
				-      if (!array_key_exists($tag, $header)) {
			
 
				-        $header[$tag] = array();
			
 
				-      }
			
 
				-      $header[$tag][] = $value;
			
 
				-    }
			
 
				-    else {
			
 
				-      if (!array_key_exists($tag, $stanza)) {
			
 
				-        $stanza[$tag] = array();
			
 
				-      }
			
 
				-      $stanza[$tag][] = $value;
			
 
				-    }
			
 
				-  }
			
 
				-  // now add the last term in the file
			
 
				-  if (sizeof($stanza) > 0) {
			
 
				-    $values = array(
			
 
				-      'id' => $stanza['id'][0],
			
 
				-      'stanza' => base64_encode(serialize($stanza)),
			
 
				-      'type' => $type,
			
 
				-    );
			
 
				-    chado_insert_record('tripal_obo_temp', $values);
			
 
				-    if (!$success) {
			
 
				-      tripal_report_error('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
			
 
				-      exit;
			
 
				-    }
			
 
				-    $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
			
 
				-    print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
			
 
				-    if ($jobid) {
			
 
				-      tripal_set_job_progress($jobid, intval(($num_read / $filesize) * 33.33333333));
			
 
				-    }
			
 
				-  }
			
 
				-  return $default_db;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Adds a database reference to a cvterm
			
 
				- *
			
 
				- * @param cvterm
			
 
				- *   The database object of the cvterm to which the synonym will be added.
			
 
				- * @param xref
			
 
				- *   The cross refernce.  It should be of the form from the OBO specification
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref) {
			
 
				-
			
 
				-  $dbname = preg_replace('/^(.+?):.*$/', '$1', $xref);
			
 
				-  $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/', '$1', $xref);
			
 
				-  $description = preg_replace('/^.+?\"(.+?)\".*?$/', '$1', $xref);
			
 
				-  $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/', '$1', $xref);
			
 
				-
			
 
				-  if (!$accession) {
			
 
				-    tripal_cv_obo_quiterror();
			
 
				-    tripal_report_error("T_obo_loader", TRIPAL_WARNING, "Cannot add a dbxref without an accession: '$xref'", NULL);
			
 
				-    return FALSE;
			
 
				-  }
			
 
				-
			
 
				-  // if the xref is a database link, handle that specially
			
 
				-  if (strcmp($dbname, 'http') == 0) {
			
 
				-    $accession = $xref;
			
 
				-    $dbname = 'URL';
			
 
				-  }
			
 
				-
			
 
				-  // add the database
			
 
				-  $db = tripal_insert_db(array('name' => $dbname));
			
 
				-  if (!$db) {
			
 
				-    tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
			
 
				-  }
			
 
				-
			
 
				-  // now add the dbxref
			
 
				-  $dbxref = tripal_cv_obo_add_dbxref($db->db_id, $accession, '', $description);
			
 
				-  if (!$dbxref) {
			
 
				-    tripal_cv_obo_quiterror("Cannot find or add the database reference (dbxref)");
			
 
				-  }
			
 
				-
			
 
				-  // finally add the cvterm_dbxref but first check to make sure it exists
			
 
				-  $values = array(
			
 
				-    'cvterm_id' => $cvterm->cvterm_id,
			
 
				-    'dbxref_id' => $dbxref->dbxref_id,
			
 
				-  );
			
 
				-  $result = chado_select_record('cvterm_dbxref', array('*'), $values);
			
 
				-  if (count($result) == 0) {
			
 
				-    $ins_options = array('return_record' => FALSE);
			
 
				-    $result = chado_insert_record('cvterm_dbxref', $values, $ins_options);
			
 
				-    if (!$result) {
			
 
				-      tripal_cv_obo_quiterror("Cannot add cvterm_dbxref: $xref");
			
 
				-      return FALSE;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  return TRUE;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Adds a property to a cvterm
			
 
				- *
			
 
				- * @param cvterm
			
 
				- *   A database object for the cvterm to which properties will be added
			
 
				- * @param $property
			
 
				- *   The name of the property to add
			
 
				- * @param $value
			
 
				- *   The value of the property
			
 
				- * @param rank
			
 
				- *   The rank of the property
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_cv_obo_add_cvterm_prop($cvterm, $property, $value, $rank) {
			
 
				-
			
 
				-  // make sure the 'cvterm_property_type' CV exists
			
 
				-  $cv = tripal_insert_cv('cvterm_property_type', '');
			
 
				-  if (!$cv) {
			
 
				-    tripal_cv_obo_quiterror("Cannot add/find cvterm_property_type cvterm");
			
 
				-  }
			
 
				-
			
 
				-  // get the property type cvterm.  If it doesn't exist then we want to add it
			
 
				-  $values = array(
			
 
				-    'name' => $property,
			
 
				-    'cv_id' => $cv->cv_id,
			
 
				-  );
			
 
				-  $results = chado_select_record('cvterm', array('*'), $values);
			
 
				-  if (count($results) == 0) {
			
 
				-    $term = array(
			
 
				-      'name' => $property,
			
 
				-      'id' => "internal:$property",
			
 
				-      'definition' => '',
			
 
				-      'is_obsolete' => 0,
			
 
				-      'cv_name' => $cv->name,
			
 
				-      'is_relationship' => FALSE,
			
 
				-    );
			
 
				-    $cvproptype = tripal_insert_cvterm($term, array('update_existing' => FALSE));
			
 
				-    if (!$cvproptype) {
			
 
				-      tripal_cv_obo_quiterror("Cannot add cvterm property: internal:$property");
			
 
				-      return FALSE;
			
 
				-    }
			
 
				-  }
			
 
				-  else {
			
 
				-    $cvproptype = $results[0];
			
 
				-  }
			
 
				-
			
 
				-  // remove any properties that currently exist for this term.  We'll reset them
			
 
				-  if ($rank == 0) {
			
 
				-    $values = array('cvterm_id' => $cvterm->cvterm_id);
			
 
				-    $success = chado_delete_record('cvtermprop', $values);
			
 
				-    if (!$success) {
			
 
				-       tripal_cv_obo_quiterror("Could not remove existing properties to update property $property for term\n");
			
 
				-       return FALSE;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // now add the property
			
 
				-  $values = array(
			
 
				-    'cvterm_id' => $cvterm->cvterm_id,
			
 
				-    'type_id' => $cvproptype->cvterm_id,
			
 
				-    'value' => $value,
			
 
				-    'rank' => $rank,
			
 
				-  );
			
 
				-  $options = array('return_record' => FALSE);
			
 
				-  $result = chado_insert_record('cvtermprop', $values, $options);
			
 
				-  if (!$result) {
			
 
				-    tripal_cv_obo_quiterror("Could not add property $property for term\n");
			
 
				-    return FALSE;
			
 
				-  }
			
 
				-  return TRUE;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Adds a database cross reference to a cvterm
			
 
				- *
			
 
				- * @param db_id
			
 
				- *   The database ID of the cross reference
			
 
				- * @param accession
			
 
				- *   The cross reference's accession
			
 
				- * @param $version
			
 
				- *   The version of the dbxref
			
 
				- * @param $description
			
 
				- *   The description of the cross reference
			
 
				- *
			
 
				- * @ingroup tripal_obo_loader
			
 
				- */
			
 
				-function tripal_cv_obo_add_dbxref($db_id, $accession, $version='', $description='') {
			
 
				-
			
 
				-  // check to see if the dbxref exists if not, add it
			
 
				-  $values = array(
			
 
				-    'db_id' => $db_id,
			
 
				-    'accession' => $accession,
			
 
				-  );
			
 
				-  $result = chado_select_record('dbxref', array('dbxref_id'), $values);
			
 
				-  if (count($result) == 0) {
			
 
				-    $ins_values = array(
			
 
				-      'db_id'       => $db_id,
			
 
				-      'accession'   => $accession,
			
 
				-      'version'     => $version,
			
 
				-      'description' => $description,
			
 
				-    );
			
 
				-    $ins_options = array('return_record' => FALSE);
			
 
				-    $result = chado_insert_record('dbxref', $ins_values, $ins_options);
			
 
				-    if (!$result) {
			
 
				-      tripal_cv_obo_quiterror("Failed to insert the dbxref record $accession");
			
 
				-      return FALSE;
			
 
				-    }
			
 
				-    $result = chado_select_record('dbxref', array('dbxref_id'), $values);
			
 
				-  }
			
 
				-  return $result[0];
			
 
				-}
			
 
				-
			
 
				-