Browse Source

Added several configurable options for allow end-user to specify how to find duplicate publications

spficklin 12 years ago
parent
commit
dacb3542df

+ 23 - 1
tripal_cv/api/tripal_cv.api.inc

@@ -182,7 +182,29 @@ function tripal_cv_get_cv_options() {
   return $options;
 
 }
-
+/**
+ * Retrieve a chado cvterm object with a given name
+ *
+ * @param $cvterm_id
+ *   the cvterm.cvterm_id
+ *
+ * @return
+ *   cvterm array or FALSE on error
+ *
+ * @ingroup tripal_cv_api
+ */
+function tripal_cv_get_cvterm_by_id($cvterm_id) {
+  if (!is_numeric($cvterm_id)) {
+    return FALSE;
+  }
+  $values = array('cvterm_id' => $cvterm_id);
+  $options = array('statement_name' => 'sel_cvterm_id');
+  $r = tripal_core_chado_select('cvterm', array('*'), $values, $options);  
+  if (!$r) {
+    return FALSE;
+  }
+  return $r[0];
+}
 /**
  * Retrieve a chado cvterm object with a given name
  *

+ 12 - 3
tripal_pub/api/tripal_pub.api.inc

@@ -506,9 +506,11 @@ function tripal_pub_get_pubs_by_dbxref($pub_dbxref) {
  *   The title of the publication to look for
  * @param type
  *   Optional. The publication type. The value of this field should come from 
- *   the Tripal Pub vocabulary
+ *   the Tripal Pub vocabulary. This should be the type name (e.g. cvterm.name)
  * @param year
  *   Optional. The year the publication was published.
+ * @param series_name
+ *   Optional.  The name of the series (e.g. Journal name)
  * 
  * @return
  *   Returns an array of all the publications that have the provided
@@ -518,7 +520,7 @@ function tripal_pub_get_pubs_by_dbxref($pub_dbxref) {
  * @ingroup tripal_pub_api 
  * 
  */
-function tripal_pub_get_pubs_by_title_type_pyear($title, $type = NULL, $pyear = NULL) {
+function tripal_pub_get_pubs_by_title_type_pyear_series($title, $type = NULL, $pyear = NULL, $series_name = NULL) {
   
   $return = array();
   
@@ -540,7 +542,14 @@ function tripal_pub_get_pubs_by_title_type_pyear($title, $type = NULL, $pyear =
     $values['pyear'] = $pyear;
     $stmnt_suffix .= 'py';
   }
-  $options = array('statement_name' => 'sel_pub_' . $stmnt_suffix);
+  if ($series_name) {
+    $values['series_name'] = strtolower($series_name);
+    $stmnt_suffix .= 'se';
+  }
+  $options = array(
+    'statement_name' => 'sel_pub_' . $stmnt_suffix,
+    'case_insensitive_columns' => array('title', 'series_name'),
+  );
   $results = tripal_core_chado_select('pub', array('pub_id'), $values, $options);
   
   // iterate through any matches and pull out the pub_id

+ 90 - 33
tripal_pub/includes/pub_form.inc

@@ -144,7 +144,7 @@ function chado_pub_form($node, $form_state) {
   $num_properties += chado_pub_node_form_add_pubprop_table_props($form, $form_state, $pub_id, $d_properties, $d_removed);
     
   // add in any new properties that have been added by the user through an AHAH callback  
-  $num_properties += chado_pub_node_form_add_new_props($form, $form_state, $num_new, $d_properties, $d_removed);
+  $num_new = chado_pub_node_form_add_new_props($form, $form_state, $d_properties, $d_removed);
   
   // add an empty row of field to allow for addition of a new property
   chado_pub_node_form_add_new_empty_props($form, $properties_select);
@@ -159,6 +159,10 @@ function chado_pub_form($node, $form_state) {
     '#type' => 'hidden',
     '#value' => $num_new,
   );
+  $form['num_properties'] = array(
+    '#type' => 'hidden',
+    '#value' => $num_properties,
+  );
   
   $form['is_obsolete'] = array(
     '#type' => 'checkbox',
@@ -189,7 +193,9 @@ function chado_pub_validate($node, &$form) {
   $pubplace     = trim($node->pubplace);
   $is_obsolete  = $node->is_obsolete;
   $pub_id       = $node->pub_id;
-
+  $num_properties = $node->num_properties;
+  $num_new = $node->num_new;
+  
   // if this is a delete then don't validate
   if($node->op == 'Delete') {
     return;
@@ -210,44 +216,95 @@ function chado_pub_validate($node, &$form) {
     form_set_error('type_id', $message);
     return;
   }
-
-  // on an insert (no $pub_id) make sure the publication doesn't already exist
-  if (!$pub_id) {
-    $results = tripal_pub_get_pubs_by_title_type_pyear($title, $cvterm[0]->name, $pyear);   
-    if (count($results) > 0) {
-      $message = t('A publication with this title, type and publication year, already exists.  Cannot add this publication');
-      form_set_error('pyear', $message);
-      return;
+  
+  // get the media name looking at the properties
+  foreach ($node as $element => $value) {
+    // if this is an existing property (either previously in the database or
+    // added via AHAH/AJAX callback)
+    if (preg_match('/^prop_value-(\d+)-(\d+)$/', $element, $matches)) {
+      $prop_type_id = $matches[1];
+      $prop_type = tripal_cv_get_cvterm_by_id($prop_type_id);
+      if($prop_type->name == 'Conference Name' or $prop_type->name == 'Journal Name') {
+        $series_name = $value;
+      }    
+    }
+    // if this is a new property (added by this submit of the form)
+    elseif ($element = 'new_id') {
+      $prop_type = tripal_cv_get_cvterm_by_id($value);
+      if($prop_type->name == 'Conference Name' or $prop_type->name == 'Journal Name') {
+        $series_name = $node->new_value;
+      }  
     }
   }
-  
-  // on an update, make sure that if the title has changed that it doesn't
-  // conflict with any other publication
+ 
+  // on an update ($pub_id is set), check to see if there have been  changes to fields that
+  // are used to check for duplicates. If not, then no need to check for duplicates
+  $skip_duplicate_check = 0; 
   if ($pub_id) {
     // first get the original title, type and year before it was changed
     $values = array('pub_id' => $pub_id);
-    $columns =  array('title','pyear','type_id');
+    $columns =  array('title', 'pyear', 'type_id', 'series_name');
     $options = array('statement_name' => 'sel_pub_id');
-    $pub = tripal_core_chado_select('pub', $columns, $values, $options);
-    
-    // if the title or year doesn't match then it was changed and we want to make
-    // sure it doesn't already exist in another publication
-    if((strcmp($pub[0]->title, $title) != 0) or
-       ($pub[0]->type_id != $type_id) or
-       ($pub[0]->year != $pyear)) {
-      $results = tripal_pub_get_pubs_by_title_type_pyear($title, $cvterm[0]->name, $pyear);
+    $pub = tripal_core_chado_select('pub', $columns, $values, $options);       
 
-      // make sure we don't capture our pub_id in the list (remove it)
-      foreach ($results as $index => $found_pub_id) {
-        if($found_pub_id == $pub_id){
-          unset($results[$index]);
-        }
-      }         
-      if (count($results) > 0) {
-        $message = t('A publication with this title and publication year, already exists.  Cannot update this publication'); 
-        form_set_error('pyear', $message);
-      }
+    // if the title, type,  year or series_name have changed then check the pub 
+    // to see if it is a duplicate of another
+    if((strcmp(strtolower($pub[0]->title), strtolower($title)) == 0) and
+       (strcmp(strtolower($pub[0]->series_name), strtolower($series_name)) == 0) and
+       ($pub[0]->type_id == $type_id) and
+       ($pub[0]->year == $pyear)) {
+       $skip_duplicate_check = 1;
     }
+  }
+  
+  // check to see if a duplicate publication already exists
+  if (!$skip_duplicate_check) {  
+
+    // make sure the publication is unique using the prefereed import duplication check
+    $import_dups_check = variable_get('tripal_pub_import_duplicate_check', 'title_year');
+    switch ($import_dups_check) {            
+      case 'title_year':
+        $results = tripal_pub_get_pubs_by_title_type_pyear_series($title, NULL, $pyear, NULL);
+        // make sure we don't capture our pub_id in the list (remove it)
+        foreach ($results as $index => $found_pub_id) {
+          if($found_pub_id == $pub_id){
+            unset($results[$index]);
+          }
+        }         
+        if (count($results) > 0) {
+          $message = t('A publication with this title and publication year, already exists.'); 
+          form_set_error('pyear', $message);
+        }
+        break;        
+      case 'title_year_type':          
+        $results = tripal_pub_get_pubs_by_title_type_pyear_series($title, $cvterm[0]->name, $pyear, NULL);
+    
+        // make sure we don't capture our pub_id in the list (remove it)
+        foreach ($results as $index => $found_pub_id) {
+          if($found_pub_id == $pub_id){
+            unset($results[$index]);
+          }
+        }         
+        if (count($results) > 0) {
+          $message = t('A publication with this title, type and publication year, already exists.'); 
+          form_set_error('pyear', $message);
+        }
+        break;        
+      case 'title_year_media':          
+        $results = tripal_pub_get_pubs_by_title_type_pyear_series($title, NULL, $pyear, $series_name);
+    
+        // make sure we don't capture our pub_id in the list (remove it)
+        foreach ($results as $index => $found_pub_id) {
+          if($found_pub_id == $pub_id){
+            unset($results[$index]);
+          }
+        }         
+        if (count($results) > 0) {
+          $message = t('A publication with this title, media name (e.g. Journal Name) and publication year, already exists.'); 
+          form_set_error('pyear', $message);
+        }
+        break;
+    }      
   } 
 }
 /*
@@ -289,7 +346,7 @@ function chado_pub_node_form_add_new_empty_props(&$form, $properties_select) {
 /*
  * 
  */
-function chado_pub_node_form_add_new_props(&$form, $form_state, $num_new, &$d_properties, &$d_removed) {
+function chado_pub_node_form_add_new_props(&$form, $form_state, &$d_properties, &$d_removed) {
    
   // first, add in all of the new properties that were added through a previous AHAH callback 
   $j = 0;

+ 2 - 0
tripal_pub/includes/pub_sync.inc

@@ -8,6 +8,8 @@ function tripal_pub_sync_form() {
     '#type' => 'item',
     '#value' => t('Syncing a publication will create a Drupal page for every publicatoin record in the Chado database. Click the button below to sync all publications in Chado that currently are not already synced with Drupal.'),
   );
+  
+
 
   $form['submit'] = array(
     '#type' => 'submit',

+ 28 - 1
tripal_pub/includes/tripal_pub.admin.inc

@@ -19,6 +19,7 @@ function tripal_pub_admin() {
   // add the field set for syncing publications
   if (!$active_jobs) {
     get_tripal_pub_admin_form_select_search_list($form);    
+    get_tripal_pub_admin_form_importing_set($form);
     get_tripal_pub_admin_form_cleanup_set($form);
   }
   else {
@@ -33,7 +34,30 @@ function tripal_pub_admin() {
 
   return system_settings_form($form);
 }
-
+/**
+ *
+ *
+ * @ingroup tripal_pub
+ */
+function get_tripal_pub_admin_form_importing_set(&$form) {
+  $form['import'] = array(
+    '#type' => 'fieldset',
+    '#title' => t('Import Settings')
+  );
+  $form['import']['import_duplicate_check'] = array(
+    '#type' => 'radios', 
+    '#title' => t('Unique Constraint'),     
+    '#options' => array(
+      'title_year' => t('Title and Year'), 
+      'title_year_media' => t('Title, Year, Media name (e.g. Journal Name, etc.)'), 
+      'title_year_type' => t('Title, Year, Media type (e.g. Journal, Conference Proceedings, etc.'),
+    ),
+    '#description' => t('During import, Tripal will attempt to find duplicate publications. 
+       There are several options for how to find a duplicate publication.  Choose the
+       option that best suits your needs.'),
+    '#default_value' => variable_get('tripal_pub_import_duplicate_check', 'title_year'),
+  );
+}
 
 /**
  *
@@ -115,6 +139,9 @@ function tripal_pub_admin_validate($form, &$form_state) {
     }  
   }
   variable_set('tripal_pub_allowed_search_fields', $allowed_fields);
+  
+  $import_duplicate_check = $form_state['values']['import_duplicate_check'];
+  variable_set('tripal_pub_import_duplicate_check', $import_duplicate_check);
     
   // -------------------------------------
   // Submit the Cleanup Job if selected  

+ 7 - 3
tripal_pub/tripal_pub.module

@@ -378,6 +378,10 @@ function chado_pub_insert($node) {
         $publisher = $value;
         unset($properties[$name]);
       }
+      elseif ($name == "Journal Name" or $name == "Conference Name") {
+        $node->series_name = $value;
+        unset($properties[$name]);
+      }
       elseif ($name == "Journal Country" or $name == "Published Location") {
         $pubplace = $value;
         unset($properties[$name]);
@@ -553,8 +557,8 @@ function chado_pub_update($node) {
         $publisher = $value;
         unset($properties[$name]);
       }
-      elseif ($name == "Journal Name") {
-        $series_name = $value;
+      elseif ($name == "Journal Name" or $name == "Conference Name") {
+        $node->series_name = $value;
         unset($properties[$name]);
       }
       elseif ($name == "Journal Country" or $name == "Published Location") {
@@ -579,7 +583,7 @@ function chado_pub_update($node) {
     'pyear'       => trim($node->pyear),
     'is_obsolete' => $node->is_obsolete ? 'true' : 'false', 
     'uniquename'  => trim($node->uniquename),
-    'series_name' => $series_name,
+    'series_name' => trim($node->series_name),
     'volumetitle' => $volumetitle,
     'volume'      => $volume,    
     'issue'       => $issue,