Browse Source

Fixed bug in importing PMID

spficklin 12 years ago
parent
commit
debf07c070

+ 123 - 37
tripal_pub/api/tripal_pub.api.inc

@@ -330,38 +330,132 @@ function tripal_pub_add_pub_dbxref($pub_id, $pub_dbxref) {
   }
   return $results;
 }
-/*
+/**
+ * Returns the list of publications that are assigned the database
+ * cross-reference provided
+ * 
+ * @param $pub_dbxref
+ *   The database cross reference accession.  It should be in the form
+ *   DB:ACCESSION, where DB is the database name and ACCESSION is the 
+ *   unique publication identifier (e.g. PMID:4382934)
+ * 
+ * @return
+ *   Returns an array of all the publications that have the provided
+ *   cross reference. If no publications match, then an empty array
+ *   is returned.
+ *   
+ * @ingroup tripal_pub_api 
  * 
  */
-function tripal_pub_add_publication($pub_details, $do_contact, $update = FALSE) {
+function tripal_pub_get_pubs_by_dbxref($pub_dbxref) {
+  
+  $return = array();
+  
+  if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
+    $dbname = $matches[1];
+    $accession = $matches[2];
+  
+    $values = array(
+      'dbxref_id' => array (
+        'accession' => $accession,
+        'db_id' => array(
+          'name' => $dbname
+        ),
+      ),
+    );
+    $options = array('statement_name' => 'sel_pubdbxref_db');
+    $results = tripal_core_chado_select('pub_dbxref', array('pub_id'), $values, $options);      
+    foreach ($results as $index => $pub) {
+      $return[] = $pub->pub_id;  
+    }    
+  } 
+  return $return;
+}
+/**
+ * Returns the list of publications that match a given title and year
+ * 
+ * @param title
+ *   The title of the publication to look for
+ * @param type
+ *   The publication type. The value of this field should come from 
+ *   the Tripal Pub vocabulary
+ * @param year
+ *   Optional. The year the publication was published.
+ * 
+ * @return
+ *   Returns an array of all the publications that have the provided
+ *   cross reference. If no publications match, then an empty array
+ *   is returned.
+ *   
+ * @ingroup tripal_pub_api 
+ * 
+ */
+function tripal_pub_get_pubs_by_title_type_pyear($title, $type, $pyear = '') {
+  
+  $return = array();
+  
+  // build the values array for the query.  The $pyear is not required.
+  $values = array(
+    'title' => $title,
+    'type_id' => array(
+      'name' => $type,
+      'cv_id' => array(
+        'name' => 'tripal_pub'
+      )
+    )
+  );
+  $stmnt_suffix = 'tity';
+  if ($pub_details['Year']) {
+    $values['pyear'] = $pyear;
+    $stmnt_suffix .= 'py';
+  }
+  $options = array('statement_name' => 'sel_pub_' . $stmnt_suffix);
+  $results = tripal_core_chado_select('pub', array('pub_id'), $values, $options);
+  
+  // iterate through any matches and pull out the pub_id
+  foreach ($results as $index => $pub) {
+    $return[] = $pub->pub_id;  
+  }
+  return $return;  
+}
+/**
+ * Adds a new publication to the Chado, along with all properties and
+ * database cross-references. If the publication does not already exist
+ * in Chado then it is added.  If it does exist nothing is done.  If
+ * the $update parameter is TRUE then the publication is updated if it exists.
+ *
+ * @param $pub_details
+ *   An associative array containing all of the details about the publication.  
+ * @param $do_contact
+ *   Optional. Set to TRUE if a contact entry should be added to the Chado contact table
+ *   for authors of the publication.
+ * @param $update
+ *   Optional.  If the publication already exists then this function will return
+ *   without adding a new publication.  However, set this value to TRUE to force
+ *   the function to pudate the publication using the $pub_details that are provided.
+ *   
+ * @return
+ *   On successful addition of the publication, the new publication ID is returned. If
+ *   the publication already exists but $update is FALSE then TRUE is returned indicating
+ *   that the publication is there already.  If $update is TRUE and the publication
+ *   exists then the publication ID is returned.
+ *   
+ */
+function tripal_pub_add_publication($pub_details, $do_contact = FALSE, $update = FALSE) {
   
   $pub_id = 0;
 
   // first try to find the publication using the accession number. It will have
   // one if the pub has already been loaded for the publication database
   if ($pub_details['Publication Dbxref']) {
-    if(preg_match('/^(.*?):(.*?)$/', $pub_details['Publication Dbxref'], $matches)) {
-      $dbname = $matches[1];
-      $accession = $matches[2];
-    
-      $values = array(
-        'dbxref_id' => array (
-          'accession' => $accession,
-          'db_id' => array(
-            'name' => $dbname
-          ),
-        ),
-      );
-      $options = array('statement_name' => 'sel_pubdbxref_db');
-      $results = tripal_core_chado_select('pub_dbxref', array('pub_id'), $values, $options);
-      if(count($results) == 1) {
-        $pub_id = $results[0]->pub_id;     
-      }
-      elseif(count($results) > 1) {
-        watchdog('tripal_pub', "There are two publications with this accession: %db:%accession. Cannot determine which to update.", 
-          array('%db' => $dbname, '%accession' => $accession), WATCHDOG_ERROR);     
-        return FALSE;    
-      }
+    $results = tripal_pub_get_pubs_by_dbxref($pub_details['Publication Dbxref']);       
+    if(count($results) == 1) {
+      $pub_id = $results[0];     
+    }
+    elseif(count($results) > 1) {
+      watchdog('tripal_pub', "There are two publications with this accession: %db:%accession. Cannot determine which to update.", 
+        array('%db' => $dbname, '%accession' => $accession), WATCHDOG_ERROR);     
+      return FALSE;    
     }
     // If we found the publication and we do not want to do the update then
     // return true to indicate the publication has been added
@@ -373,22 +467,14 @@ function tripal_pub_add_publication($pub_details, $do_contact, $update = FALSE)
   // yet exist or it has been added using a different publication database) then
   // try to find it using the title and publication year.
   elseif ($pub_details['Title']) {
-    $values = array();
-    $values['title'] = $pub_details['Title'];
-    $stmnt_suffix = 'ti';
-    if ($pub_details['Year']) {
-      $values['pyear'] = $pub_details['Year'];
-      $stmnt_suffix .= 'py';
-    }
-    $options = array('statement_name' => 'sel_pub_');
-    $results = tripal_core_chado_select('pub', array('*'), $values, $options);
-
+    $results = tripal_pub_get_pubs_by_title_type_pyear($pub_details['Title'], $pub_details['Publication Type'], $pub_details['Year']);    
     if (count($results) == 1) {
-      $pub_id = $results[0]->pub_id;
+      $pub_id = $results[0];
     }
     elseif (count($results) > 1) {
-      watchdog('tripal_pub', "The publication with the same title is present multiple times. Cannot ".
-        "determine which to use.  Title: %title", array('%title' => $pub_details['Title']), WATCHDOG_ERROR);     
+      watchdog('tripal_pub', "The publication with the same title, type and year is present multiple times. Cannot ".
+        "determine which to use.  Title: '%title'. Type: '%type'. Year: '%year'", 
+        array('%title' => $pub_details['Title'], '%type' => $pub_details['Publication Type'], '%year' => $pub_details['Year']), WATCHDOG_ERROR);     
       return FALSE;          
     }
     // If we found the publication and we do not want to do the update then

+ 77 - 1
tripal_pub/includes/pub_form.inc

@@ -171,6 +171,82 @@ function chado_pub_form($node, $form_state) {
   return $form;
 
 }
+/*
+ * 
+ */
+function chado_pub_validate($node) {
+  
+  // get the submitted values
+  $title        = trim($node->title);
+  $uniquename   = trim($node->uniquename);
+  $type_id      = trim($node->type_id);
+  $volume       = trim($node->volume);
+  $volumetitle  = trim($node->volumetitle);
+  $series_name  = trim($node->series_name);
+  $issue        = trim($node->issue);
+  $pyear        = trim($node->pyear);
+  $pages        = trim($node->pages);
+  $miniref      = trim($node->miniref);
+  $publisher    = trim($node->publisher);
+  $pubplace     = trim($node->pubplace);
+  $is_obsolete  = $node->is_obsolete;
+  $pub_id       = $node->pub_id;
+  
+  // if this is a delete then don't validate
+  if($node->op == 'Delete') {
+    return;
+  }
+
+  // make sure the year is four digits
+  if(!preg_match('/^\d\d\d\d$/', $pyear)){
+    form_set_error('pyear', t('The publication year should be a 4 digit year.'), array());
+  }
+  
+  // get the type of publication
+  $values = array('cvterm_id' => $type_id);
+  $options = array('statement_name' => 'sel_pub_ty');
+  $cvterm = tripal_core_chado_select('cvterm', array('name'), $values, $options);
+  if (count($cvterm) == 0) {
+    form_set_error('type_id', t('Invalided publication type.'), array());
+    return;
+  }
+
+  // on an insert (no $pub_id) make sure the publication doesn't already exist
+  if (!$pub_id) {
+    $results = tripal_pub_get_pubs_by_title_type_pyear($title, $cvterm[0]->name, $pyear);   
+    if (count($results) > 0) {
+      form_set_error('pyear',t('A publication with this title, type and publication year, already exists.  Cannot add this publication'), array());
+    }
+  }
+  
+  // on an update, make sure that if the title has changed that it doesn't
+  // conflict with any other publication
+  if ($pub_id) {
+    // first get the original title, type and year before it was changed
+    $values = array('pub_id' => $pub_id);
+    $columns =  array('title','pyear','type_id');
+    $options = array('statement_name' => 'sel_pub_id');
+    $pub = tripal_core_chado_select('pub', $columns, $values, $options);
+    
+    // if the title or year doesn't match then it was changed and we want to make
+    // sure it doesn't already exist in another publication
+    if((strcmp($pub[0]->title, $title) != 0) or
+       ($pub[0]->type_id != $type_id) or
+       ($pub[0]->year != $pyear)) {
+      $results = tripal_pub_get_pubs_by_title_type_pyear($title, $cvterm[0]->name, $pyear);
+
+      // make sure we don't capture our pub_id in the list (remove it)
+      foreach ($results as $index => $found_pub_id) {
+        if($found_pub_id == $pub_id){
+          unset($results[$index]);
+        }
+      }         
+      if (count($results) > 0) {
+        form_set_error('pyear',t('A publication with this title and publication year, already exists.  Cannot update this publication'), array());       
+      }
+    }
+  } 
+}
 /*
  * 
  */
@@ -657,4 +733,4 @@ function tripal_pub_property_get_description() {
       'data'   => '<div id="tripal-pub-new_value-desc">' . $description . '</div>',
     )    
   );
-}
+}

+ 1 - 3
tripal_pub/includes/pub_importers.inc

@@ -142,9 +142,7 @@ function theme_tripal_pub_importer_setup_form($form) {
  */
 function tripal_pub_importer_setup_form(&$form_state = NULL, $pub_import_id = NULL, $action = 'new') {
   tripal_core_ahah_init_form();
-
-  dpm($form_state);
-  
+ 
   // Set the default values. If the pub_import_id isn't already defined by the form values 
   // and one is provided then look it up in the database
   $criteria = NULL;

+ 12 - 3
tripal_pub/includes/pubmed.inc

@@ -240,8 +240,16 @@ function tripal_pub_PMID_parse_pubxml($pub_xml) {
       
       switch ($element) {
         case 'PMID':
+          // thre are multiple places where a PMID is present in the XML and
+          // since this code does not descend into every branch of the XML tree
+          // we will encounter many of them here.  Therefore, we only want the
+          // PMID that we first encounter. If we already have the PMID we will 
+          // just skip it.  Examples of other PMIDs are in the articles that
+          // cite this one.
           $xml->read(); // get the value for this element
-          $pub['Publication Dbxref'] = 'PMID:' . $xml->value;
+          if(!$pub['Publication Dbxref']) {
+            $pub['Publication Dbxref'] = 'PMID:' . $xml->value;
+          }
           break;        
         case 'Article':
           $pub_model = $xml->getAttribute('PubModel');
@@ -298,7 +306,7 @@ function tripal_pub_PMID_parse_pubxml($pub_xml) {
       }
     }
   }
-  $pub['Citation'] = $pub['Authors'] . '. ' . $pub['Title'] .  ' ' .
+  $pub['Citation'] = $pub['Authors'] . '. ' . $pub['Title'] .  '. ' .
     $pub['Journal ISO Abbreviation'] . '. ' . $pub['Publication Date'];
   if ($pub['Volume'] or $pub['Issue']) {
     $pub['Citation'] .= '; ';  
@@ -372,7 +380,8 @@ function tripal_pub_PMID_parse_article($xml, &$pub) {
           break;
         case 'ArticleTitle':
           $xml->read();
-          $pub['Title'] = $xml->value;
+          // remoave any trailing period from the title
+          $pub['Title'] = preg_replace('/\.$/', '', $xml->value); 
           break;
         case 'Abstract':
           tripal_pub_PMID_parse_abstract($xml, $pub);

+ 18 - 18
tripal_pub/tripal_pub.module

@@ -309,20 +309,20 @@ function chado_pub_insert($node) {
         $type_id = $matches[1];
         $index = $matches[2];
         $name = $properties_list[$type_id];
-        $properties[$name][$index] = $value;
+        $properties[$name][$index] = trim($value);
       }      
     }
     if ($node->new_id and $node->new_value) {
       $type_id = $node->new_id;
       $index = count($properties[$name]);
       $name = $properties_list[$type_id];
-      $properties[$name][$index] = $node->new_value;    
+      $properties[$name][$index] = trim($node->new_value);    
     } 
     
     // iterate through all of the properties and remove those that really are
     // part of the pub table fields
     foreach ($properties as $name => $element) {
-      $value = $element[0];
+      $value = trim($element[0]);
       if ($name == "Volume") {
         $volume = $value;
         unset($properties[$name]);
@@ -356,12 +356,12 @@ function chado_pub_insert($node) {
     
     // insert the pub record
     $values = array(
-      'title'       => $node->title,
-      'series_name' => $node->series_name,
-      'type_id'     => $node->type_id,
-      'pyear'       => $node->pyear,
+      'title'       => trim($node->title),
+      'series_name' => trim($node->series_name),
+      'type_id'     => trim($node->type_id),
+      'pyear'       => trim($node->pyear),
       'is_obsolete' => $node->is_obsolete ? 'true' : 'false', 
-      'uniquename'  => $node->uniquename,
+      'uniquename'  => trim($node->uniquename),
       'volumetitle' => $volumetitle,
       'volume'      => $volume,    
       'issue'       => $issue,    
@@ -392,7 +392,7 @@ function chado_pub_insert($node) {
     
     // add in any database cross-references
     foreach ($cross_refs as $index => $ref) {
-      $pub_dbxref = tripal_pub_add_pub_dbxref($pub['pub_id'], $ref);
+      $pub_dbxref = tripal_pub_add_pub_dbxref($pub['pub_id'], trim($ref));
       if (!$pub_dbxref) { 
         drupal_set_message("Error cannot add publication cross reference: $ref", "error");
         watchdog('tripal_pub', "Error cannot add publication cross reference: %ref", 
@@ -476,20 +476,20 @@ function chado_pub_update($node) {
       $type_id = $matches[1];
       $index = $matches[2];  
       $name = $properties_list[$type_id];  
-      $properties[$name][$index] = $value;     
+      $properties[$name][$index] = trim($value);     
     }
     if (preg_match('/^new_value-(\d+)-(\d+)/', $key, $matches)) {
       $type_id = $matches[1];
       $index = $matches[2];
       $name = $properties_list[$type_id];
-      $properties[$name][$index] = $value;
+      $properties[$name][$index] = trim($value);
     }      
   }
   if ($node->new_id and $node->new_value) {
     $type_id = $node->new_id;    
     $name = $properties_list[$type_id];
     $index = count($properties[$name]);
-    $properties[$name][$index] = $node->new_value;    
+    $properties[$name][$index] = trim($node->new_value);    
   } 
   
   // iterate through all of the properties and remove those that really are
@@ -533,12 +533,12 @@ function chado_pub_update($node) {
     'pub_id' => $pub_id,
   );
   $values = array(
-    'title'       => $node->title,
-    'series_name' => $node->series_name,
-    'type_id'     => $node->type_id,
-    'pyear'       => $node->pyear,
+    'title'       => trim($node->title),
+    'series_name' => trim($node->series_name),
+    'type_id'     => trim($node->type_id),
+    'pyear'       => trim($node->pyear),
     'is_obsolete' => $node->is_obsolete ? 'true' : 'false', 
-    'uniquename'  => $node->uniquename,
+    'uniquename'  => trim($node->uniquename),
     'volumetitle' => $volumetitle,
     'volume'      => $volume,    
     'issue'       => $issue,    
@@ -571,7 +571,7 @@ function chado_pub_update($node) {
   // add in any database cross-references after first removing 
   tripal_core_chado_delete('pub_dbxref', array('pub_id' => $pub_id));
   foreach ($cross_refs as $index => $ref) {
-    $pub_dbxref = tripal_pub_add_pub_dbxref($pub_id, $ref);
+    $pub_dbxref = tripal_pub_add_pub_dbxref($pub_id, trim($ref));
     if (!$pub_dbxref) { 
       drupal_set_message("Error cannot add publication cross reference: $ref", "error");
       watchdog('tripal_pub', "Error cannot add publication cross reference: %ref",