Переглянути джерело

OBO loader is now functional.

spficklin 14 роки тому
батько
коміт
de147f903a
4 змінених файлів з 950 додано та 210 видалено
  1. 457 0
      tripal_cv/feature_property.obo
  2. 358 168
      tripal_cv/obo_loader.php
  3. 30 15
      tripal_cv/tripal_cv.install
  4. 105 27
      tripal_cv/tripal_cv.module

+ 457 - 0
tripal_cv/feature_property.obo

@@ -0,0 +1,457 @@
+format-version: 1.2
+date: 10:06:2005 15:07
+saved-by: cjm
+default-namespace: unknown
+autogenerated-by: /Users/cjm/cvs/go-dev/go-perl/scripts/go2fmt.pl
+default-namespace: feature_property
+remark: Initially generated by Chris Mungall, sourced from FB and Rice chado. TODO: TIGR Chado; TODO: GFF3
+subsetdef: fpo_sgd "SGD and related dbs subset"
+subsetdef: fpo_gff "GFF3 specific tags"
+subsetdef: fpo_apollo "properties with fixed semantics in apollo"
+
+[Typedef]
+id: SOFP:feature_property
+name: feature_property
+namespace: feature_property
+def: "A general purpose relation between a biological feature and some value" [so:cjm]
+
+[Typedef]
+id: SOFP:aminoacid
+name: aminoacid
+namespace: feature_property
+def: "amino acid coded for by a tRNA transcript feature" [so:cjm]
+is_a: SOFP:feature_property
+domain: SO:0000253
+range: CHEBI:22477
+
+[Typedef]
+id: SOFP:anticodon
+name: anticodon
+namespace: feature_property
+def: "anticodon coded for by a tRNA transcript feature" [so:cjm]
+is_a: SOFP:feature_property
+domain: SO:0000253
+range: xsd:string
+range_def: "A 3 character string representing the anticodon using the IUPAC DNA Sequence alphabet; for example ATG. RNA seqs MUST be converted to DNA seqs" []
+comments: Note the difference between GENBANK_SOFP:anticodon and SOFP:anticodon; the former includes the base range
+
+[Typedef]
+id: SOFP:citation
+name: citation
+namespace: feature_property
+is_a: SOFP:feature_property
+domain: SO:0000110
+
+[Typedef]
+id: SOFP:comment
+name: comment
+namespace: feature_property
+def: "Annotation comments, from a human curator" []
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+
+[Typedef]
+id: SOFP:cyto_range
+name: cyto_range
+namespace: feature_property
+def: "The cytological range covered by a feature. May be auto-generated from sequence coordinates, or determined by experimental methods" [so:cjm]
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+range_def: "Must conform to naming standard for that species; typically fly-style (eg 41A2-B3) or normal (eg 14p28.1-q32.2)" [so:cjm]
+comments: If auto-derived, may be redundant with feature location
+
+[Typedef]
+id: SOFP:description
+name: description
+namespace: feature_property
+def: "Free-text description of feature" []
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+comments: Often sourced from fasta header, in which case it includes everything after the > symbol
+
+[Typedef]
+id: SOFP:dicistronic
+name: dicistronic
+namespace: feature_property
+def: "true if transcript codes for >1 non-overlapping CDSs" []
+is_a: SOFP:feature_property
+domain: SO:0000115      ! transcript_feature
+range: xsd:boolean
+comments: redundant with secondary classification term SO:0000079
+
+[Typedef]
+id: SOFP:element
+name: element
+namespace: feature_property
+def: "name of transposable element class" []
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+comment: in chado or gff, this may also be indicated by a reference to a class in an ontology of TE classes
+
+[Typedef]
+id: SOFP:encoded_symbol
+name: encoded_symbol
+namespace: feature_property
+def: "dicistronic CDS/proteins are attached to a single gene feature representing the whole cassette - in which case the symbol refers to the cassette. encoded_symbol refers to a gene in the sense of non-overlapping CDS set. cf Adh and Adhr in dmel" []
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+
+[Typedef]
+id: SOFP:evidenceGB
+name: evidenceGB
+namespace: feature_property
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+range_def: "'experimental'"
+comment: same as GENBANK_SOFP:experimental??
+
+[Typedef]
+id: SOFP:linked_to
+name: linked_to
+namespace: feature_property
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+comment: someone please define.. to do with restriction fragments?
+
+[Typedef]
+id: SOFP:missing_start_codon
+name: missing_start_codon
+namespace: feature_property
+def: "true if start of CDS is unknown" []
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:boolean
+comment: in chado, redundant with featureloc.is_{fmin,fmax}_partial
+
+[Typedef]
+id: SOFP:missing_stop_codon
+name: missing_stop_codon
+namespace: feature_property
+def: "true if end of CDS is unknown" []
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:boolean
+comment: in chado, redundant with featureloc.is_{fmin,fmax}_partial
+
+[Typedef]
+id: SOFP:na_change
+name: na_change
+namespace: feature_property
+def: "A nucleic acid modification in some variant feature relative to the reference sequence" []
+is_a: SOFP:feature_property
+domain: SO:0000109         ! sequence_variant
+range: xsd:string
+range_def: "<na_seq-REFERENCE><position><na_seq-VARIANT>"
+comment: in chado, this is redundant with featureloc.residue_info[rank=0,1]
+
+[Typedef]
+id: SOFP:non_canonical_start_codon
+name: non_canonical_start_codon
+namespace: feature_property
+def: "The sequence of the biological start codon" []
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+range_def: "3-character DNA sequence, must be different from ATG" []
+
+[Typedef]
+id: SOFP:owner
+name: owner
+namespace: feature_property
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+
+[Typedef]
+id: SOFP:pr_change
+name: pr_change
+namespace: feature_property
+def: "An amino acid modification in some variant feature relative to the reference sequence" []
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+range_def: "<aa_seq-ORIGINAL><position><aa_seq-NEW>. @ represents stop codon"
+comment: see also: na_change; in chado, this is redundant with featureloc.residue_info[rank=0,1]
+
+[Typedef]
+id: SOFP:problem
+name: problem
+namespace: feature_property
+def: "True if the annotation for this feature is problematic in some way; more details can be found in the comments property" []
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:boolean
+
+[Typedef]
+id: SOFP:readthrough_stop_codon
+name: readthrough_stop_codon
+namespace: feature_property
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:boolean
+
+[Typedef]
+id: SOFP:reported_na_change
+name: reported_na_change
+namespace: feature_property
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+range_def: "As for na_change, suffixed with |<dbxref>" []
+
+[Typedef]
+id: SOFP:reported_pr_change
+name: reported_pr_change
+namespace: feature_property
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+range_def: "As for pr_change, suffixed with |<dbxref>" []
+
+[Typedef]
+id: SOFP:sp_comment
+name: sp_comment
+namespace: feature_property
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+comment: is this deprecated? originally sourced from gadfly pep validation pipeline - of little interest to others than flybase
+
+[Typedef]
+id: SOFP:sp_status
+name: sp_status
+namespace: feature_property
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+comment: is this deprecated? originally sourced from gadfly pep validation pipeline - of little interest to others than flybase
+
+[Typedef]
+id: SOFP:status
+name: status
+namespace: feature_property
+def: "Annotation workflow status" []
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+range_def: "'not done'"
+
+[Typedef]
+id: SOFP:source
+name: source
+namespace: feature_property
+def: "Source from which feature originates - may be a computer program or analysis, or a group. Corresponds to GFF column 2" []
+subset: fpo_gff
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+
+[Typedef]
+id: SOFP:symbol
+name: symbol
+namespace: feature_property
+def: "Community symbol" []
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+
+[Typedef]
+id: SOFP:validation_flag
+name: validation_flag
+namespace: feature_property
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+range_def: "TODO: define the list of allowed values in an ontology" []
+
+[Typedef]
+id: SOFP:synonym
+name: synonym
+exact_synonym: "Alias" []
+namespace: feature_property
+def: "Historic community symbol, may have originally been symbol" []
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+comment: In chado, this is redundant with the synonym table. GFF3 uses the tag "Alias"
+
+[Typedef]
+id: SOFP:date
+name: date
+namespace: feature_property
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:date
+comment: date of what?? Annotation??
+
+[Typedef]
+id: SOFP:internal_synonym
+name: internal_synonym
+namespace: feature_property
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+comment: FB specific?
+
+[Typedef]
+id: SOFP:qseq_type
+name: qseq_type
+namespace: feature_property
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+comment: deprecated?
+
+[Typedef]
+id: SOFP:unixdate
+name: unixdate
+namespace: feature_property
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:integer
+range_def: "No of seconds since unix time zero, 1 Jan 1970" []
+comment: date of what?? Annotation??
+
+[Typedef]
+id: SOFP:gbunit
+name: gbunit
+namespace: feature_property
+def: "Name of genbank scaffold sequence intersected by the feature" []
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+range_def: "<dbxref> - taken from NCBI/EMBL/DDBJ" []
+
+[Typedef]
+id: SOFP:keywords
+name: keywords
+namespace: feature_property
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+
+[Typedef]
+id: SOFP:orf_classification
+name: orf_classification
+namespace: feature_property
+def: "gene call confidence flag"
+subset: fpo_sgd
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+range_def: "'Dubious'|'Uncharacterized'|'Verified'" []
+comment: Currently only used by SGD
+
+[Typedef]
+id: SOFP:ontology_term
+name: ontology_term
+namespace: feature_property
+def: "A term from some ontology classifying the feature" []
+subset: fpo_gff
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+range_def: "<dbxref> of a term in some ontology" []
+comment: In chado, this is redundant with the feature_cvterm table
+
+[Typedef]
+id: SOFP:protein_id
+name: protein_id
+namespace: feature_property
+def: "A protein sequence identifier" [so:sjc]
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+comment: Should really be a dbxref [so:sjc]
+
+[Typedef]
+id: SOFP:organism
+name: organism
+namespace: feature_property
+def: "The name of the organism" [so:sjc]
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+comment: In chado, this is redundant with the organism table [so:sjc]
+
+[Typedef]
+id: SOFP:mol_type
+name: mol_type
+namespace: feature_property
+def: "The type of molecule" [so:sjc]
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+comment: Presumably, this could be inferred from the SO type [so:sjc]
+
+[Typedef]
+id: SOFP:dev_stage
+name: dev_stage
+namespace: feature_property
+def: "Stage of development" [so:sjc]
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+
+[Typedef]
+id: SOFP:chromosome
+name: chromosome
+namespace: feature_property
+def: "Name of chromosome the feature occurs on(?)" [so:sjc]
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+comment: What is this for? [so:sjc]
+
+[Typedef]
+id: SOFP:map
+name: map
+namespace: feature_property
+def: "A map location" [so:sjc]
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+
+[Typedef]
+id: SOFP:finished
+name: finished
+namespace: feature_property
+def: "If the annotation of the feature is complete" [so:sjc]
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:boolean
+
+[Typedef]
+id: SOFP:Note
+name: Note
+namespace: feature_property
+def: "A GFF3 Note attribute" [so:sjc]
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+
+[Typedef]
+id: SOFP:Gap
+name: Gap
+namespace: feature_property
+def: "A GFF3 Gap cigar string" [so:sjc]
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+
+[Typedef]
+id: SOFP:score
+name: score
+namespace: feature_property
+def: "A GFF3 score" [so:sjc]
+is_a: SOFP:feature_property
+domain: SO:0000110
+range: xsd:string
+

+ 358 - 168
tripal_cv/obo_loader.php

@@ -1,10 +1,100 @@
 <?php
 
+/*************************************************************************
+*
+*/
+function tripal_cv_load_obo_v1_2_id($obo_id,$jobid = NULL){
 
+   // get the OBO reference
+   $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = %d";
+   $obo = db_fetch_object(db_query($sql,$obo_id));
+
+   // if the reference is for a remote URL then run the URL processing function
+   if(preg_match("/^http:\/\/",$obo->path) or preg_match("/^ftp:\/\/",$obo->path)){
+      tripal_cv_load_obo_v1_2_url($obo->name,$obo->path,$jobid,0);
+   } 
+   // if the reference is for a local file then run the file processing function
+   else {
+      // check to see if the file is located local to Drupal
+      $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path; 
+      if(file_exists($dfile)){
+         tripal_cv_load_obo_v1_2_file($obo->name,$dfile,$jobid,0);
+      } 
+      // if not local to Drupal, the file must be someplace else, just use
+      // the full path provided
+      else{
+         tripal_cv_load_obo_v1_2_file($obo->name,$obo->path,$jobid,0);
+      }
+   }  
+}
 /*************************************************************************
 *
 */
-function tripal_cv_load_obo_v1_2($file) {
+function tripal_cv_load_obo_v1_2_file($obo_name,$file,$jobid = NULL,$is_new = 1){
+   $newcvs = array();
+
+   tripal_cv_load_obo_v1_2($file,$jobid,$newcvs);
+   if($is_new){
+      tripal_cv_load_obo_add_ref($obo_name,$file);
+   }
+   // update the cvtermpath table 
+   tripal_cv_load_update_cvtermpath($newcvs,$jobid);
+   print "Ontology Sucessfully loaded!\n";  
+}
+/*************************************************************************
+*
+*/
+function tripal_cv_load_obo_v1_2_url($obo_name,$url,$jobid = NULL,$is_new = 1){
+
+   $newcvs = array();
+
+   // first download the OBO
+   $temp = tempnam(tripal_get_moddir('tripal_cv'),'obo_');
+   print "Opening URL $url\n";
+   $url_fh = fopen($url,"r");
+   $obo_fh = fopen($temp,"w");
+   while(!feof($url_fh)){
+      fwrite($obo_fh,fread($url_fh,255),255);
+   }
+   fclose($url_fh);
+   fclose($obo_fh);
+
+   // second, parse the OBO
+   tripal_cv_load_obo_v1_2($temp,$jobid,$newcvs);
+
+   // now remove the temp file
+   unlink($temp);
+
+   if($is_new){
+      tripal_cv_load_obo_add_ref($obo_name,$url);
+   }
+
+   // update the cvtermpath table 
+   tripal_cv_load_update_cvtermpath($newcvs,$jobid);
+
+   print "Ontology Sucessfully loaded!\n";
+}
+/*************************************************************************
+*
+*/
+function tripal_cv_load_update_cvtermpath($newcvs,$jobid){
+
+   print "\nUpdating cvtermpath table.  This may take a while...\n";
+   foreach($newcvs as $namespace => $cvid){
+      tripal_cv_update_cvtermpath($cvid, $jobid);
+   }
+}
+/*************************************************************************
+*
+*/
+function tripal_cv_load_obo_add_ref($name,$path){
+   $isql = "INSERT INTO tripal_cv_obo (name,path) VALUES ('%s','%s')";
+   db_query($isql,$name,$path);
+}
+/*************************************************************************
+*
+*/
+function tripal_cv_load_obo_v1_2($file,$jobid = NULL,&$newcvs) {
 
    $header = array();
    $obo = array();
@@ -16,137 +106,195 @@ function tripal_cv_load_obo_v1_2($file) {
 
    // make sure we have an 'internal' and a '_global' database
    if(!tripal_cv_obo_add_db('internal')){
-      return tripal_cv_obo_loader_done();
+      tripal_cv_obo_quiterror("Cannot add 'internal' database");
    }
    if(!tripal_cv_obo_add_db('_global')){
-      return tripal_cv_obo_loader_done();
+      tripal_cv_obo_quiterror("Cannot add '_global' database");
    }
 
    // parse the obo file
    tripal_cv_obo_parse($file,$obo,$header);
 
    // add the CV for this ontology to the database
-   $cv = tripal_cv_obo_add_cv($header['default-namespace'][0],'');
-   if(!$cv){
-      return tripal_cv_obo_loader_done();
+   $defaultcv = tripal_cv_obo_add_cv($header['default-namespace'][0],'');
+   if(!$defaultcv){
+      tripal_cv_obo_quiterror('Cannot add namespace ' . $header['default-namespace'][0]);
    }  
+   $newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
 
    // add any typedefs to the vocabulary first
    $typedefs = $obo['Typedef'];
    foreach($typedefs as $typedef){
-      tripal_cv_obo_process_stanza($typedef,$cv,1);  
+      tripal_cv_obo_process_term($typedef,$defaultcv,$obo,1,$newcvs);  
    }
 
    // next add terms to the vocabulary
    $terms = $obo['Term'];
-   if(!tripal_cv_obo_process_stanza($terms,$cv,$obo)){
-      return tripal_cv_obo_loader_done();   
+   if(!tripal_cv_obo_process_terms($terms,$defaultcv,$obo,$jobid,$newcvs)){
+      tripal_cv_obo_quiterror('Cannot add terms from this ontology');
    }
    return tripal_cv_obo_loader_done();
 }
-
 /*************************************************************************
 *
 */
-function tripal_cv_obo_process_stanza($terms,$cv,$obo,$is_relationship=0){
+function tripal_cv_obo_quiterror ($message){
+   print "ERROR: $message\n";
+   db_query("set search_path to public");  
+   exit;
+}
+/*************************************************************************
+*
+*/
+function tripal_cv_obo_loader_done (){
+   // return the search path to normal
+   db_query("set search_path to public");  
+   return '';
+}
+/*************************************************************************
+*
+*/
+function tripal_cv_obo_process_terms($terms,$defaultcv,$obo,$jobid=null,&$newcvs){
+
+   $i = 0;
+   $count = sizeof($terms);
+   $interval = intval($count * 0.01);
 
    foreach ($terms as $term){
 
-      // add the cvterm
-      $cvterm = tripal_cv_obo_add_cv_term($term,$cv,$is_relationship,1);     
-      if(!$cvterm){ return 0; }
+      // update the job status every 1% terms
+      if($jobid and $i % $interval == 0){
+         tripal_job_set_progress($jobid,intval(($i/$count)*50)); // we mulitply by 50 because parsing and loacing cvterms
+                                                                 // is only the first half.  The other half is updating
+      }                                                          // the cvtermpath table.
 
-      // now handle other properites
-      if(isset($term['is_anonymous'])){
-        print "WARNING: unhandled tag: is_anonymous\n";
+      if(!tripal_cv_obo_process_term($term,$defaultcv,$obo,0,$newcvs)){
+         tripal_cv_obo_quiterror("Failed to process terms from the ontology");
       }
-      if(isset($term['alt_id'])){
-         foreach($term['alt_id'] as $alt_id){
-            if(!tripal_cv_obo_add_cvterm_dbxref($cvterm,$alt_id)){
-               return 0;
-            }
+      $i++;
+   }
+   return 1;
+}
+/*************************************************************************
+*
+*/
+
+function tripal_cv_obo_process_term($term,$defaultcv,$obo,$is_relationship=0,&$newcvs){
+
+   // add the cvterm
+   $cvterm = tripal_cv_obo_add_cv_term($term,$defaultcv,$is_relationship,1);     
+   if(!$cvterm){ 
+      tripal_cv_obo_quiterror("Cannot add the term " . $term['id'][0]);
+   }
+   if($term['namespace'][0]){
+      $newcvs[$term['namespace'][0]] = $cvterm->cv_id;
+   }
+
+   // now handle other properites
+   if(isset($term['is_anonymous'])){
+     print "WARNING: unhandled tag: is_anonymous\n";
+   }
+   if(isset($term['alt_id'])){
+      foreach($term['alt_id'] as $alt_id){
+         if(!tripal_cv_obo_add_cvterm_dbxref($cvterm,$alt_id)){
+            tripal_cv_obo_quiterror("Cannot add alternate id $alt_id");
          }
       }
-      if(isset($term['subset'])){
-        print "WARNING: unhandled tag: subset\n";
-      }
-      // add synonyms for this cvterm
-      if(isset($term['synonym'])){
-         if(!tripal_cv_obo_add_synonyms($term,$cvterm)){
-            return 0;
-         }
+   }
+   if(isset($term['subset'])){
+     print "WARNING: unhandled tag: subset\n";
+   }
+   // add synonyms for this cvterm
+   if(isset($term['synonym'])){
+      if(!tripal_cv_obo_add_synonyms($term,$cvterm)){
+         tripal_cv_obo_quiterror("Cannot add synonyms");
       }
+   }
+   // reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym'
+   // types to be of the v1.2 standard
+   if(isset($term['exact_synonym']) or isset($term['narrow_synonym']) or isset($term['broad_synonym'])){
       if(isset($term['exact_synonym'])){
-        // depricated
-        print "WARNING: unhandled tag: exact_synonym\n";
+         foreach ($term['exact_synonym'] as $synonym){
+
+            $new = preg_replace('/^\s*(\".+?\")(.*?)$/','$1 EXACT $2',$synonym);
+            $term['synonym'][] = $new;
+         }
       }
       if(isset($term['narrow_synonym'])){
-        print "WARNING: unhandled tag: narrow_synonym\n";
-        // depricated
+         foreach ($term['narrow_synonym'] as $synonym){
+            $new = preg_replace('/^\s*(\".+?\")(.*?)$/','$1 NARROW $2',$synonym);
+            $term['synonym'][] = $new;
+         }
       }
       if(isset($term['broad_synonym'])){
-        print "WARNING: unhandled tag: broad_synonym\n";
-        // depricated
+         foreach ($term['broad_synonym'] as $synonym){
+            $new = preg_replace('/^\s*(\".+?\")(.*?)$/','$1 BROAD $2',$synonym);
+            $term['synonym'][] = $new;
+         }
+      } 
+
+      if(!tripal_cv_obo_add_synonyms($term,$cvterm)){
+         tripal_cv_obo_quiterror("Cannot add/update synonyms");
       }
-      // add the comment to the cvtermprop table
-      if(isset($term['comment'])){
-         $comments = $term['comment'];
-         $j = 0;
-         foreach($comments as $comment){
-            if(!tripal_cv_obo_add_cvterm_prop($cvterm,'comment',$comment,$j)){
-               return 0;
-            }
-            $j++;
+   }
+   // add the comment to the cvtermprop table
+   if(isset($term['comment'])){
+      $comments = $term['comment'];
+      $j = 0;
+      foreach($comments as $comment){
+         if(!tripal_cv_obo_add_cvterm_prop($cvterm,'comment',$comment,$j)){
+            tripal_cv_obo_quiterror("Cannot add/update cvterm property");
          }
-      }    
-      // add any other external dbxrefs
-      if(isset($term['xref']) or isset($term['xref_analog']) or isset($term['xref_unk'])){
-         foreach($term['xref'] as $xref){
-            if(!tripal_cv_obo_add_cvterm_dbxref($cvterm,$xref)){
-               return 0;
-            }
+         $j++;
+      }
+   }    
+   // add any other external dbxrefs
+   if(isset($term['xref']) or isset($term['xref_analog']) or isset($term['xref_unk'])){
+      foreach($term['xref'] as $xref){
+         if(!tripal_cv_obo_add_cvterm_dbxref($cvterm,$xref)){
+            tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
          }
       }
+   }
 
-      // add is_a relationships for this cvterm
-      if(isset($term['is_a'])){
-         foreach($term['is_a'] as $is_a){
-            if(!tripal_cv_obo_add_relationship($cvterm,$cv,$obo,'is_a',$is_a)){
-               return 0;
-            }
+   // add is_a relationships for this cvterm
+   if(isset($term['is_a'])){
+      foreach($term['is_a'] as $is_a){
+         if(!tripal_cv_obo_add_relationship($cvterm,$defaultcv,$obo,'is_a',$is_a,$is_relationship)){
+            tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a");
          }
-      } 
-      if(isset($term['intersection_of'])){
-        print "WARNING: unhandled tag: intersection_of\n";
       }
-      if(isset($term['union_of'])){
-        print "WARNING: unhandled tag: union_on\n";
-      }
-      if(isset($term['disjoint_from'])){
-        print "WARNING: unhandled tag: disjoint_from\n";
-      }
-      if(isset($term['relationship'])){
-         foreach($term['relationship'] as $value){
-            $rel = preg_replace('/^(.+?)\s.+?$/','\1',$value);
-            $object = preg_replace('/^.+?\s(.+?)$/','\1',$value);
-            if(!tripal_cv_obo_add_relationship($cvterm,$cv,$obo,$rel,$object)){
-               return 0;
-            }
+   } 
+   if(isset($term['intersection_of'])){
+     print "WARNING: unhandled tag: intersection_of\n";
+   }
+   if(isset($term['union_of'])){
+     print "WARNING: unhandled tag: union_on\n";
+   }
+   if(isset($term['disjoint_from'])){
+     print "WARNING: unhandled tag: disjoint_from\n";
+   }
+   if(isset($term['relationship'])){
+      foreach($term['relationship'] as $value){
+         $rel = preg_replace('/^(.+?)\s.+?$/','\1',$value);
+         $object = preg_replace('/^.+?\s(.+?)$/','\1',$value);
+         if(!tripal_cv_obo_add_relationship($cvterm,$defaultcv,$obo,$rel,$object,$is_relationship)){
+            tripal_cv_obo_quiterror("Cannot add relationship $rel: $object");
          }
       }
-      if(isset($term['replaced_by'])){
-        print "WARNING: unhandled tag: replaced_by\n";
-      }
-      if(isset($term['consider'])){
-        print "WARNING: unhandled tag: consider\n";
-      }
-      if(isset($term['use_term'])){
-        print "WARNING: unhandled tag: user_term\n";
-      }
-      if(isset($term['builtin'])){
-        print "WARNING: unhandled tag: builtin\n";
-      }
-   }   
+   }
+   if(isset($term['replaced_by'])){
+     print "WARNING: unhandled tag: replaced_by\n";
+   }
+   if(isset($term['consider'])){
+     print "WARNING: unhandled tag: consider\n";
+   }
+   if(isset($term['use_term'])){
+     print "WARNING: unhandled tag: user_term\n";
+   }
+   if(isset($term['builtin'])){
+     print "WARNING: unhandled tag: builtin\n";
+   }
    return 1;
 }
 /*************************************************************************
@@ -158,8 +306,7 @@ function tripal_cv_obo_add_db($dbname){
    $db = db_fetch_object(db_query($db_sql,$dbname));
    if(!$db){
       if(!db_query("INSERT INTO {db} (name) VALUES ('%s')",$dbname)){
-         print "Cannot create '$dbname' db in Chado.";
-         return 0;
+         tripal_cv_obo_quiterror("Cannot create '$dbname' db in Chado.");
       }      
      $db = db_fetch_object(db_query($db_sql,$dbname));
    }
@@ -180,15 +327,13 @@ function tripal_cv_obo_add_cv($name,$comment){
    if(!$cv){
       $sql = "INSERT INTO {cv} (name,definition) VALUES ('%s','%s')";
       if(!db_query($sql,$vocab,$remark)){
-         print "Failed to create the CV record";
-         return 0;
+         tripal_cv_obo_quiterror("Failed to create the CV record");
       }
       $cv = db_fetch_object(db_query($cv_sql,$vocab));
    } else {
       $sql = "UPDATE {cv} SET definition = '%s' WHERE name ='%s'";
       if(!db_query($sql,$remark,$vocab)){
-         print "Failed to update the CV record";
-         return 0;
+         tripal_cv_obo_quiterror("Failed to update the CV record");
       }
       $cv = db_fetch_object(db_query($cv_sql,$vocab));
    }
@@ -201,7 +346,9 @@ function tripal_cv_obo_add_cvterm_prop($cvterm,$property,$value,$rank){
 
    // make sure the 'cvterm_property_type' CV exists
    $cv = tripal_cv_obo_add_cv($property,'');
-   if(!$cv){ return 0; }
+   if(!$cv){ 
+      tripal_cv_obo_quiterror("Cannot add/find cvterm_property_type cvterm");
+   }
 
    // get the property type cvterm.  If it doesn't exist then we want to add it
    $sql = "
@@ -218,7 +365,9 @@ function tripal_cv_obo_add_cvterm_prop($cvterm,$property,$value,$rank){
          'is_obsolete' => array(0),
       );
       $cvproptype = tripal_cv_obo_add_cv_term($term,$cv,0,0);
-      if(!$cvproptype){  return 0; }      
+      if(!$cvproptype){  
+         tripal_cv_obo_quiterror("Cannot add/upste cvterm property: internal:$property");
+      }
    }
 
 
@@ -232,56 +381,45 @@ function tripal_cv_obo_add_cvterm_prop($cvterm,$property,$value,$rank){
    $sql = "INSERT INTO {cvtermprop} (cvterm_id,type_id,value,rank) ".
           "VALUES (%d, %d, '%s',%d)";
    if(!db_query($sql,$cvterm->cvterm_id,$cvproptype->cvterm_id,$value,$rank)){
-      print "Could not add property $property for term\n";
-      return 0;
+      tripal_cv_obo_quiterror("Could not add property $property for term\n");
    }
    return 1;
 }
 /*************************************************************************
 *
 */
-function tripal_cv_obo_add_relationship($cvterm,$cv,$obo,$rel,$objname){
+function tripal_cv_obo_add_relationship($cvterm,$defaultcv,$obo,$rel,$objname,$object_is_relationship=0){
 
    // make sure the relationship cvterm exists
-   $sql = "
-        SELECT * 
-        FROM {cvterm} CVT INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
-        WHERE CVT.name = '%s' and CV.name = '%s'
-   ";
-   $cvisa = db_fetch_object(db_query($sql,$rel,$cv->name));
-   if(!$cvisa){
-      $term = array(
-         'name' => array($rel),
-         'id' => array($rel),
-         'definition' => array(''),
-         'is_obsolete' => array(0),
-      );
-      if(!tripal_cv_obo_add_cv_term($term,$cv,1,1)){
-         print "Cannot find or insert the relationship term: $rel.\n";
-         return 0;
-      }
-      $cvisa = db_fetch_object(db_query($sql,$rel,$cv->name));
+   $term = array(
+      'name' => array($rel),
+      'id' => array($rel),
+      'definition' => array(''),
+      'is_obsolete' => array(0),
+   );
+   $relcvterm = tripal_cv_obo_add_cv_term($term,$defaultcv,1,0);
+   if(!$relcvterm){
+      tripal_cv_obo_quiterror("Cannot find or insert the relationship term: $rel\n");
    }
 
    // get the object term
    $objterm = tripal_cv_obo_get_term($obo,$objname);
    if(!$objterm) { 
-      print "Could not find object term $objname\n";
-      return 0; 
+      tripal_cv_obo_quiterror("Could not find object term $objname\n"); 
+   }
+   $objcvterm = tripal_cv_obo_add_cv_term($objterm,$defaultcv,$object_is_relationship,1);
+   if(!$objcvterm){ 
+      tripal_cv_obo_quiterror("Cannot add/find cvterm");
    }
-   $objcvterm = tripal_cv_obo_add_cv_term($objterm,$cv,1,1);
-   if(!$objcvterm){ return 0; }
 
    // check to see if the cvterm_relationship already exists, if not add it
    $cvrsql = "SELECT * FROM {cvterm_relationship} WHERE type_id = %d and subject_id = %d and object_id = %d";
-   if(!db_fetch_object(db_query($cvrsql,$cvisa->cvterm_id,$cvterm->cvterm_id,$objcvterm->cvterm_id))){
+   if(!db_fetch_object(db_query($cvrsql,$relcvterm->cvterm_id,$cvterm->cvterm_id,$objcvterm->cvterm_id))){
       $sql = "INSERT INTO {cvterm_relationship} ".
              "(type_id,subject_id,object_id) VALUES (%d,%d,%d)";
-      if(!db_query($sql,$cvisa->cvterm_id,$cvterm->cvterm_id,$objcvterm->cvterm_id)){
-         print "Cannot add $rel relationship";
-         return 0;
+      if(!db_query($sql,$relcvterm->cvterm_id,$cvterm->cvterm_id,$objcvterm->cvterm_id)){
+         tripal_cv_obo_quiterror("Cannot add term relationship: '$cvterm->name' $rel '$objcvterm->name'");
       }
-//      print  "  $rel $objname\n";
    }
 
    return 1;
@@ -311,8 +449,7 @@ function tripal_cv_obo_add_synonyms($term,$cvterm){
    if(!$syncv){
       $sql = "INSERT INTO {cv} (name,definition) VALUES ('synonym_type','')";
       if(!db_query($sql)){
-         print "Failed to add the synonyms type vocabulary";
-         return 0;
+         tripal_cv_obo_quiterror("Failed to add the synonyms type vocabulary");
       }
    }
 
@@ -340,7 +477,7 @@ function tripal_cv_obo_add_synonyms($term,$cvterm){
                'is_obsolete' => array(0),
             );
             if(!tripal_cv_obo_add_cv_term($term,$syncv,0,1)){
-               return 0;
+               tripal_cv_obo_quiterror("Cannot add synonym type: internal:$type");
             }
             $syntype = db_fetch_object(db_query($cvtsql,$type,'synonym_type'));
          }       
@@ -356,10 +493,22 @@ function tripal_cv_obo_add_synonyms($term,$cvterm){
             $sql = "INSERT INTO {cvtermsynonym} (cvterm_id,synonym,type_id)
                     VALUES(%d,'%s',%d)";
             if(!db_query($sql,$cvterm->cvterm_id,$def,$syntype->cvterm_id)){
-               print "Failed to insert the synonym for term: $name ($def)\n";
-               return 0;
+               tripal_cv_obo_quiterror("Failed to insert the synonym for term: $name ($def)");
             }
-         }            
+         } 
+
+         // now add the dbxrefs for the synonym if we have a comma in the middle
+         // of a description then this will cause problems when splitting os lets
+         // just change it so it won't mess up our splitting and then set it back
+         // later.
+//         $synonym = preg_replace('/(".*?),\s(.*?")/','$1,_$2',$synonym);
+//         $dbxrefs = preg_split("/, /",preg_replace('/^.*\[(.*?)\]$/','\1',$synonym));
+//         foreach($dbxrefs as $dbxref){
+//            $dbxref = preg_replace('/,_/',", ",$dbxref);
+//            if($dbxref){
+//               tripal_cv_obo_add_cvterm_dbxref($syn,$dbxref);
+//            }
+//         }
       } 
    }
    return 1;
@@ -367,45 +516,77 @@ function tripal_cv_obo_add_synonyms($term,$cvterm){
 /*************************************************************************
 *
 */
-function tripal_cv_obo_add_cv_term($term,$cv,$is_relationship = 0,$update = 1){
+function tripal_cv_obo_add_cv_term($term,$defaultcv,$is_relationship = 0,$update = 1){
 
    // get the term properties
    $id = $term['id'][0];
    $name = $term['name'][0];
+   $cvname = $term['namespace'][0];
    $definition = preg_replace('/^\"(.*)\"/','\1',$term['def'][0]);
    $is_obsolete = 0;
    if(isset($term['is_obsolete'][0]) and  strcmp($term['is_obsolete'][0],'true')==0){
      $is_obsolete = 1;
    }
+   if(!$cvname){
+      $cvname = $defaultcv->name;
+   }
+   // make sure the CV name exists
+   $cv = tripal_cv_obo_add_cv($cvname,'');
+   if(!$cv){
+      tripal_cv_obo_quiterror("Cannot find namespace '$cvname' when adding/updating $id");
+   }
+
+   // this SQL statement will be used a lot to find a cvterm so just set it
+   // here for easy reference below.
+   $cvtermsql = "SELECT CVT.name, CVT.cvterm_id, DB.name as dbname, DB.db_id 
+                  FROM {cvterm} CVT
+                    INNER JOIN {dbxref} DBX on CVT.dbxref_id = DBX.dbxref_id
+                    INNER JOIN {db} DB on DBX.db_id = DB.db_id
+                    INNER JOIN {cv} CV on CV.cv_id = CVT.cv_id
+                  WHERE CVT.name = '%s' and DB.name = '%s'";  
 
    // get the accession and the database from the cvterm
    if(preg_match('/^.+?:.*$/',$id)){
       $accession = preg_replace('/^.+?:(.*)$/','\1',$id);
       $dbname = preg_replace('/^(.+?):.*$/','\1',$id);
    } 
-   else if($is_relationship) {
+   if($is_relationship and !$dbname){
       $accession = $id;
-      $dbname = 'OBO_REL';
+      // because this is a relationship cvterm first check to see if it 
+      // exists in the relationship ontology. If it does then return the cvterm.
+      //  If not then set the dbname to _global and we'll add it or find it there
+      $cvterm = db_fetch_object(db_query($cvtermsql,$name,'OBO_REL'));
+      if($cvterm){
+         return $cvterm;
+      } else {
+         // next check if this term is in the _global ontology.  If it is then
+         // return it no matter what the original CV
+         $dbname = '_global';
+
+         $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));
+         if($cvterm){
+            return $cvterm;
+         }
+      }
+   }
+   if(!$is_relationship and !$dbname){
+      tripal_cv_obo_quiterror("A database identifier is missing from the term: $id");
    }
 
-   // check to see if the database exists
+   // check to see if the database exists. 
    $db = tripal_cv_obo_add_db($dbname);
    if(!$db){
-      print "Cannot find database '$dbname' in Chado.\n";
-      return 0;
+      tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
    }
 
-   // check to see if the cvterm already exists
-   $cvtermsql = "SELECT * from {cvterm} WHERE name = '%s' and cv_id = %d";
-   $cvterm = db_fetch_object(db_query($cvtermsql,$name,$cv->cv_id));
 
    // if the cvterm doesn't exist then add it otherwise just update it
+   $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));
    if(!$cvterm){
       // check to see if the dbxref exists if not, add it
       $dbxref =  tripal_cv_obo_add_dbxref($db->db_id,$accession);
       if(!$dbxref){
-         print "Failed to find or insert the dbxref record for cvterm, $name (id: $accession), for database $dbname\n";
-         return 0;
+         tripal_cv_obo_quiterror("Failed to find or insert the dbxref record for cvterm, $name (id: $accession), for database $dbname");
       }
 
       // now add the cvterm
@@ -416,11 +597,18 @@ function tripal_cv_obo_add_cv_term($term,$cv,$is_relationship = 0,$update = 1){
       ";
       if(!db_query($sql,$cv->cv_id,$name,$definition,
           $dbxref->dbxref_id,$is_obsolete,$is_relationship)){
-         print "Failed to insert the term: $id\n";
-         return 0;
+         if(!$is_relationship){
+            tripal_cv_obo_quiterror("Failed to insert the term: $name ($dbname)");
+         } else {
+           tripal_cv_obo_quiterror("Failed to insert the relationship term: $name (cv: " . $cvname . " db: $dbname)");
+         }
       }     
-      print "Added CV term: $id\n";
-      $cvterm = db_fetch_object(db_query($cvtermsql,$name,$cv->cv_id));
+      $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));
+      if(!$is_relationship){
+         print "Added CV term: $name ($dbname)\n";
+      } else {
+         print "Added relationship CV term: $name ($dbname)\n";
+      }
    }
    elseif($update) { // update the cvterm
       $sql = "
@@ -430,11 +618,14 @@ function tripal_cv_obo_add_cv_term($term,$cv,$is_relationship = 0,$update = 1){
       ";
       if(!db_query($sql,$term['name'][0],$definition,
           $is_obsolete,$is_relationship,$cvterm->cvterm_id)){
-         print "Failed to update the term: $name\n";
-         return 0;
+         tripal_cv_obo_quiterror("Failed to update the term: $name");
       }  
-      print "Updated CV term: $id\n";
-      $cvterm = db_fetch_object(db_query($cvtermsql,$name,$cv->cv_id));         
+      $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));         
+      if(!$is_relationship){
+         print "Updated CV term: $name ($dbname)\n";
+      } else {
+         print "Updated relationship CV term: $name ($dbname)\n";
+      }
    }
    // return the cvterm
    return $cvterm;
@@ -444,8 +635,14 @@ function tripal_cv_obo_add_cv_term($term,$cv,$is_relationship = 0,$update = 1){
 */
 function tripal_cv_obo_add_cvterm_dbxref($cvterm,$xref){
 
-   $accession = preg_replace('/^.+?:(.*)$/','\1',$xref);
-   $dbname = preg_replace('/^(.+?):.*$/','\1',$xref);
+   $dbname = preg_replace('/^(.+?):.*$/','$1',$xref);
+   $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/','$1',$xref);
+   $description = preg_replace('/^.+?\"(.+?)\".*?$/','$1',$xref);
+   $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/','$1',$xref);
+
+   if(!$accession){
+      tripal_cv_obo_quiterror("Cannot add a dbxref without an accession: '$xref'");
+   }
 
    // if the xref is a database link, handle that specially
    if(strcmp($dbname,'http')==0){
@@ -456,13 +653,14 @@ function tripal_cv_obo_add_cvterm_dbxref($cvterm,$xref){
    // check to see if the database exists
    $db = tripal_cv_obo_add_db($dbname);
    if(!$db){
-      print "Cannot find database '$dbname' in Chado.";
-      return 0;
+      tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
    }
 
    // now add the dbxref
-   $dbxref = tripal_cv_obo_add_dbxref($db->db_id,$accession);
-   if(!$dbxref){ return 0;}
+   $dbxref = tripal_cv_obo_add_dbxref($db->db_id,$accession,'',$description);
+   if(!$dbxref){ 
+      tripal_cv_obo_quiterror("Cannot find or add the database reference (dbxref)");
+   }
 
    // finally add the cvterm_dbxref but first check to make sure it exists
    $sql = "SELECT * from {cvterm_dbxref} WHERE cvterm_id = %d and dbxref_id = %d";
@@ -470,8 +668,7 @@ function tripal_cv_obo_add_cvterm_dbxref($cvterm,$xref){
       $sql = "INSERT INTO {cvterm_dbxref} (cvterm_id,dbxref_id)".
              "VALUES (%d,%d)";
       if(!db_query($sql,$cvterm->cvterm_id,$dbxref->dbxref_id)){
-         print "Cannot add cvterm_dbxref: $accession\n";
-         return 0;
+         tripal_cv_obo_quiterror("Cannot add cvterm_dbxref: $xref");
       }
    }
    return 1;
@@ -490,9 +687,9 @@ function tripal_cv_obo_add_dbxref($db_id,$accession,$version='',$description='')
          VALUES (%d,'%s','%s','%s')
       ";
       if(!db_query($sql,$db_id,$accession,$version,$description)){
-         print "Failed to insert the dbxref record $accession\n";
-         return 0;
+         tripal_cv_obo_quiterror("Failed to insert the dbxref record $accession");
       }
+      print "Added Dbxref accession: $accession\n";
       $dbxref = db_fetch_object(db_query($dbxsql,$db_id,$accession));
    }
    return $dbxref;
@@ -570,11 +767,4 @@ function tripal_cv_obo_parse($obo_file,&$obo,&$header){
    }
 }
 
-/*************************************************************************
-*
-*/
-function tripal_cv_obo_loader_done (){
-   // return the search path to normal
-   db_query("set search_path to public");  
-   return '';
-}
+

+ 30 - 15
tripal_cv/tripal_cv.install

@@ -43,7 +43,7 @@ function tripal_cv_install(){
 
    // create the tables that correlate OBO files/references with a chado CV
    drupal_install_schema('tripal_cv');
-
+   tripal_cv_add_obo_defaults();
 }
 /*******************************************************************************
 *  This update adds the new tripal_obo table.  This is an upgrade from
@@ -52,19 +52,13 @@ function tripal_cv_install(){
 
 function tripal_cv_update_6000(){
    drupal_install_schema('tripal_cv');
+   tripal_cv_add_obo_defaults();
    $ret = array(
       '#finished' => 1,
    );
    
    return $ret;
 }
-/************************************************************************
-* Implementation of hook_schema().
-*/
-function tripal_cv_schema() {
-   $schema = tripal_cv_get_schemas();
-   return $schema;
-}
 /*******************************************************************************
 * Implementation of hook_uninstall()
 */
@@ -97,6 +91,13 @@ function tripal_cv_requirements($phase) {
    return $requirements;
 }
 /************************************************************************
+* Implementation of hook_schema().
+*/
+function tripal_cv_schema() {
+   $schema = tripal_cv_get_schemas();
+   return $schema;
+}
+/************************************************************************
 * This function simply defines all tables needed for the module to work
 * correctly.  By putting the table definitions in a separate function we 
 * can easily provide the entire list for hook_install or individual
@@ -105,17 +106,31 @@ function tripal_cv_requirements($phase) {
 function tripal_cv_get_schemas (){  
   $schema = array();
 
-  $schema['tripal_obo'] = array(
+  $schema['tripal_cv_obo'] = array(
       'fields' => array(
-         'cv_id' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0),
-         'file'  => array('type' => 'varchar','length' => 1024),
-         'url'   => array('type' => 'varchar','length' => 1024),
+         'obo_id' => array('type' => 'serial', 'unsigned' => TRUE, 'not null' => TRUE),
+         'name'  => array('type' => 'varchar','length' => 255),
+         'path'   => array('type' => 'varchar','length' => 1024),
       ),
       'indexes' => array(
-         'cv_id' => array('cv_id')
+         'obo_id' => array('obo_id'),
        ),
-      'primary key' => array('cv_id'),
+      'primary key' => array('obo_id'),
   );
-
   return $schema;
 }
+/************************************************************************
+* Add's defaults to the tripal_cv_obo table
+*/
+function tripal_cv_add_obo_defaults(){
+   // insert commonly used ontologies into the tables
+   $isql = "INSERT INTO tripal_cv_obo (name,path) VALUES ('%s','%s')";
+   db_query($isql,'Chado Feature Properties',drupal_get_path('module','tripal_cv').'/feature_property.obo');
+   db_query($isql,'Relationship Ontology','http://www.obofoundry.org/ro/ro.obo');
+   db_query($isql,'Sequence Ontology','http://song.cvs.sourceforge.net/*checkout*/song/ontology/so.obo');
+   db_query($isql,'Gene Ontology','http://www.geneontology.org/ontology/gene_ontology.obo');
+   db_query($isql,'Cell Ontology','http://obo.cvs.sourceforge.net/obo/obo/ontology/anatomy/cell_type/cell.obo?rev=HEAD');
+   db_query($isql,'Plant Structure Ontology','http://palea.cgrb.oregonstate.edu/viewsvn/Poc/trunk/ontology/OBO_format/po_anatomy.obo?view=co');  
+   db_query($isql,'Plant Growth and Development Stages Ontology','http://palea.cgrb.oregonstate.edu/viewsvn/Poc/trunk/ontology/OBO_format/po_temporal.obo?view=co');
+
+}

+ 105 - 27
tripal_cv/tripal_cv.module

@@ -53,7 +53,7 @@ function tripal_cv_menu() {
      'type' => MENU_NORMAL_ITEM,
    );
    $items['admin/tripal/tripal_cv/obo'] = array(
-     'title' =>'Upload an OBO File',
+     'title' =>'Add/Update Ontology With OBO File',
      'page callback' => 'drupal_get_form',
      'page arguments' => array('tripal_cv_obo_form'),
      'access arguments' => array('access administration pages'),
@@ -179,9 +179,9 @@ function tripal_cv_admin_page(){
    $cvtermpath_url = url("admin/tripal/tripal_cv/cvtermpath");
    $browser_url = url("cv_browser");
    $output = "<a href=\"$add_url\">Add a new controlled vocabulary</a> | ";
-   $output = "<a href=\"$obo_url\">Upload an OBO file</a> | ";
-   $output .= "<a href=\"$cvtermpath_url\">Update the cvtermpath table</a> | ";
-   $output .= "<a href=\"$browser_url\">Browse a vocabulary</a> ";    
+   $output .= "<a href=\"$browser_url\">Browse a vocabulary</a> | ";    
+   $output .= "<a href=\"$obo_url\">Add/Update Ontology With OBO File</a> | ";
+   $output .= "<a href=\"$cvtermpath_url\">Update the cvtermpath table</a> ";
    $output .= drupal_get_form('tripal_cv_select_form');
    $output .= '<div id="db-edit-div">Please select a vocabulary above to view or edit</div>';
    return $output;
@@ -275,35 +275,99 @@ function tripal_cv_add_form(&$form_state = NULL){
 function tripal_cv_obo_form(&$form_state = NULL){
 
 
-   $form['obo_url']= array(
+	// get a list of db from chado for user to choose
+	$sql = "SELECT * FROM {tripal_cv_obo} ORDER BY obo_id";
+	$results = db_query ($sql);
+
+
+	$obos = array();
+   $obos[] = '';
+	while ($obo = db_fetch_object($results)){
+		$obos[$obo->obo_id] = "$obo->name  | $obo->path";
+	}
+
+
+   $form['instructions']= array(
+      '#value' => t('Use this interface to upload a controlled vocabulary in OBO format.  A new vocabulary and its 
+                     associated terms will be added to the database the first time an OBO is uploaded.  Each
+                     subsequent upload will update the terms.  The format of the OBO should be in v1.2 format,
+                     however, this loader does support some older styles formats. '),
+      '#weight'        => -2
+   );
+
+   $form['obo_existing'] = array(
+      '#type' =>'fieldset',
+      '#title' => t('Use a Saved Ontology OBO Reference')
+   );
+
+   $form['obo_new'] = array(
+      '#type' =>'fieldset',
+      '#title' => t('Use a New Ontology OBO Reference')
+   );
+
+
+   $form['obo_existing']['existing_instructions']= array(
+      '#value' => t('The Ontology OBO files listed in the drop down below have been automatically added upon 
+                     installation of the Tripal CV module or were added from a previous upload.  Select
+                     an OBO, then click the submit button to load the vocabulary into the database.  If the
+                     vocabularies already exist then the ontology will be updated.  Note that the name of the 
+                     ontology provided here may not match the namespace of the vocabulary.  For instance,
+                     the Gene Ontology will add three vocabularies: biological_process, cellular_component, and 
+                     molecular_function.'),
+      '#weight'        => -1
+   );
+
+	$form['obo_existing']['obo_id'] = array(
+      '#title' => t('Ontology OBO File Reference'),
+      '#type' => 'select',
+      '#options' => $obos,
+      '#weight'        => 0
+	);
+
+   $form['obo_new']['path_instructions']= array(
+      '#value' => t('Provide the name and path for the OBO file.  If the vocabulary OBO file 
+                     is stored local to the server provide a file name. If the vocabulry is stored remotely, 
+                     provide a URL.  Only provide a URL or a local file, not both.'),
+      '#weight'        => 0
+   );
+
+   $form['obo_new']['obo_name']= array(
+      '#type'          => 'textfield',
+      '#title'         => t('New Vocabulary Name'),
+      '#description'   => t('Please provide a name for this vocabulary.  After upload, this name will appear in the drop down
+                             list above for use again later.'),
+      '#weight'        => 1
+   );
+
+
+
+   $form['obo_new']['obo_url']= array(
       '#type'          => 'textfield',
-      '#title'         => t('Remote OBO v1.2 Definition File'),
-      '#description'   => t('Please enter a URL the points to online OBO file.  The file will be downloaded and parsed. 
-                             All necessary information for naming of the conrolled vocabulary is contained in the file.
-                             If the specification file contains more than one vocabulary then each will be created.'),
+      '#title'         => t('Remote URL'),
+      '#description'   => t('Please enter a URL for the online OBO file.  The file will be downloaded and parsed.
+                             (e.g. http://www.obofoundry.org/ro/ro.obo'),
       '#default_value' => $default_desc,
-      '#weight'        => -1
+      '#weight'        => 2
    );
-   $form['obo_file']= array(
+   $form['obo_new']['obo_file']= array(
       '#type'          => 'textfield',
-      '#title'         => t('Local OBO v1.2 Definition File'),
-      '#description'   => t('Please enter the full path for an OBO definition file.  The path must be accessible to the
-                             server on which this Drupal instance is running.  All necessary information for naming of the
-                             controlled vocabulary is contained in this file.  If the specification file contains more than 
-                             one vocabulary then each will be created.'),
+      '#title'         => t('Local File'),
+      '#description'   => t('Please enter the full system path for an OBO definition file, or a path within the Drupal
+                             installation (e.g. /sites/default/files/xyz.obo).  The path must be accessible to the
+                             server on which this Drupal instance is running.'),
       '#default_value' => $default_desc,
-      '#weight'        => 0
+      '#weight'        => 3
    );
 
 
-   $form['upload'] = array (
+   $form['submit'] = array (
      '#type'         => 'submit',
-     '#value'        => t('Upload'),
+     '#value'        => t('Submit'),
      '#weight'       => 5,
      '#executes_submit_callback' => TRUE,
    );
 
-   $form['#redirect'] = 'admin/tripal/tripal_cv';
+   $form['#redirect'] = 'admin/tripal/tripal_cv/obo';
 
 
    return $form;
@@ -451,17 +515,31 @@ function tripal_cv_add_form_submit($form, &$form_state){
 function tripal_cv_obo_form_submit($form, &$form_state){
    global $user;
 
+   $obo_id =  $form_state['values']['obo_id'];
+   $obo_name =  $form_state['values']['obo_name'];
    $obo_url =  $form_state['values']['obo_url'];
    $obo_file  = $form_state['values']['obo_file'];
 
-   if($obo_url){ 
-   }
-   elseif($obo_file){
-      $args = array($obo_file);
-      tripal_add_job("Load OBO $file",'tripal_cv',
-         "tripal_cv_load_obo_v1_2",$args,$user->uid);
+   $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = %d";
+   $obo = db_fetch_object(db_query($sql,$obo_id));
+
+   if($obo_id){ 
+      $args = array($obo_id);
+      tripal_add_job("Load OBO $obo->name",'tripal_cv',
+         "tripal_cv_load_obo_v1_2_id",$args,$user->uid);
    } 
- 
+   else {
+      if($obo_url){ 
+         $args = array($obo_name,$obo_url);
+         tripal_add_job("Load OBO $obo_name",'tripal_cv',
+            "tripal_cv_load_obo_v1_2_url",$args,$user->uid);
+      }
+      elseif($obo_file){
+         $args = array($obo_name,$obo_file);
+         tripal_add_job("Load OBO $obo_name",'tripal_cv',
+            "tripal_cv_load_obo_v1_2_file",$args,$user->uid);
+      } 
+   }
 
    return '';
 }