فهرست منبع

Added a not fail option when a record is of type 'select' in the bulk loader. Altered the bulk loader to substitute primary key values into data table values so that there are less FK recursive calls. Added a new option to the tripal_core_chado_select to let someone just check if the record exists only using the unique constraint values.

spficklin 12 سال پیش
والد
کامیت
c3719190b6

+ 50 - 8
tripal_bulk_loader/tripal_bulk_loader.admin.templates.inc

@@ -161,6 +161,13 @@ function tripal_bulk_loader_modify_template_base_form($form_state = NULL, $mode)
         if ($table_array['select_if_duplicate']) {
           $mode_value .= ' or select if duplicate';
         } 
+        if ($table_array['select_optional']) {
+          $mode_value .= ' (no fail)';
+        }
+        if ($table_array['update_if_duplicate']) {
+          $mode_value .= ' or update if duplicate';
+        } 
+        
         
         // add in the disabled
         if ($table_array['disable']) {
@@ -659,18 +666,30 @@ function tripal_bulk_loader_edit_template_record_form(&$form_state = NULL) {
      $mode = 'insert';
   }
   
+  // get default for the select optional
+  $select_optional = $form_state['storage']['template_array'][$form_state['storage']['original_priority']]['select_optional'];
+  if(!isset($select_optional)){
+    $select_optional = 1;
+  }
+  
   // get default for the select if duplicate
   $select_if_duplicate = $form_state['storage']['template_array'][$form_state['storage']['original_priority']]['select_if_duplicate'];
   if(!isset($select_if_duplicate)){
     $select_if_duplicate = 1;
   }
   
+  // get default for the update if duplicate
+  $update_if_duplicate = $form_state['storage']['template_array'][$form_state['storage']['original_priority']]['update_if_duplicate'];
+  if(!isset($update_if_duplicate)){
+    $update_if_duplicate = 0;
+  }
+  
   // get default for the select if duplicate
   $optional = $form_state['storage']['template_array'][$form_state['storage']['original_priority']]['optional'];
   if(!isset($optional)){
     $optional = 0;
   }
-
+  
   // get the default for disabling the record
   $disable = $form_state['storage']['template_array'][$form_state['storage']['original_priority']]['disable'];
 
@@ -746,20 +765,41 @@ function tripal_bulk_loader_edit_template_record_form(&$form_state = NULL) {
     '#default_value' => $mode
   );
   
-  $form['edit_record']['optional'] = array(
+  $form['edit_record']['select_options'] = array(
+     '#type' => 'markup',
+     '#value' => t('Additional Select Options:'),
+  );
+  $form['edit_record']['select_optional'] = array(
      '#type' => 'checkbox',
-     '#title' => t('Optional'),
-     '#description' => t('The insert or select will only be performed if all required data are present'),
-     '#default_value' => $optional
+     '#title' => t('Continue if no record exists.'),
+     '#description' => t('By default if a select does not find a match the loader will fail.  Check here to allow the loader to continue when no match is found.'),
+     '#default_value' => $select_optional
   );
   
+  $form['edit_record']['insert_options'] = array(
+     '#type' => 'markup',
+     '#prefix' => '<br>',
+     '#value' => t('Additional Insert Options:'),
+  );
+    
   $form['edit_record']['select_if_duplicate'] = array(
      '#type' => 'checkbox',
-     '#title' => t('Do INSERT, or SELECT if Duplicate'),
-     '#description' => t('Perform the insert as selected above. If this is the first time this record has been added then perform an insert. Otherwise, perform a select.  The unique constraint for the table will be checked prior to insert.'),
+     '#title' => t('SELECT if duplicate (no insert)'),
+     '#description' => t('If this is not the first time this record has been added then perform a select rather than an insert.'),
      '#default_value' => $select_if_duplicate
   );
-  
+  $form['edit_record']['update_if_duplicate'] = array(
+     '#type' => 'checkbox',
+     '#title' => t('UPDATE if duplicate (no insert)'),
+     '#description' => t('If this is not the first time this record has been added then perform an update rather than an insert.'),
+     '#default_value' => $update_if_duplicate
+  );  
+  $form['edit_record']['optional'] = array(
+     '#type' => 'checkbox',
+     '#title' => t('Optional'),
+     '#description' => t('The insert, update or select will only be performed if all required data are present'),
+     '#default_value' => $optional
+  );  
   $form['edit_record']['disable'] = array(
      '#type' => 'checkbox',
      '#title' => t('Disable this record'),
@@ -803,6 +843,8 @@ function tripal_bulk_loader_edit_template_record_form_submit($form, &$form_state
       $record['mode'] = $form_state['values']['mode'];
       $record['table'] = $form_state['values']['chado_table'];
       $record['select_if_duplicate'] = $form_state['values']['select_if_duplicate'];
+      $record['update_if_duplicate'] = $form_state['values']['update_if_duplicate'];      
+      $record['select_optional'] = $form_state['values']['select_optional'];            
       $record['disable'] = $form_state['values']['disable'];
       $record['optional'] = $form_state['values']['optional'];
 

+ 70 - 16
tripal_bulk_loader/tripal_bulk_loader.loader.inc

@@ -163,9 +163,11 @@ function tripal_bulk_loader_load_data($nid, $job_id) {
 
       $default_data[$priority]['table'] = $record_array['table'];
       $default_data[$priority]['mode'] = ($record_array['mode']) ? $record_array['mode'] : 'insert';
-      $default_data[$priority]['select_if_duplicate'] = ($record_array['select_if_duplicate']) ? $record_array['select_if_duplicate'] : 1;
+      $default_data[$priority]['select_if_duplicate'] = ($record_array['select_if_duplicate']) ? $record_array['select_if_duplicate'] : 0;
+      $default_data[$priority]['update_if_duplicate'] = ($record_array['update_if_duplicate']) ? $record_array['update_if_duplicate'] : 0;
       $default_data[$priority]['disabled'] = ($record_array['disable']) ? $record_array['disable'] : 0;
       $default_data[$priority]['optional'] = ($record_array['optional']) ? $record_array['optional'] : 0;
+      $default_data[$priority]['select_optional'] = ($record_array['select_optional']) ? $record_array['select_optional'] : 0;
       $default_data[$priority]['record_id'] = $record_array['record_id'];
       $record2priority[$record_array['record_id']] = $priority;
       $default_data[$priority]['required'][$field_array['field']] = $field_array['required'];
@@ -498,7 +500,7 @@ function process_data_array_for_line($priority, &$data, &$default_data, $addt) {
       if (!isset($values[$field]) or (is_array($values[$field]) and count($values[$field]) == 0)){
         // check if the record is optional.  For backwards compatiblity we need to
         // check if the 'mode' is set to 'optional'
-        if ($table_data['optional'] or preg_match('/optional/', $table_data['mode'])) {
+        if ($table_data['optional'] or preg_match('/optional/', $table_data['mode']) or $table_data['select_optional'])  {
           $skip_optional = 1;
           // set the values array to be empty since we all required fields are
           // optional and we can't do a select/insert so we don't want to keep
@@ -546,18 +548,39 @@ function process_data_array_for_line($priority, &$data, &$default_data, $addt) {
   // if "select if duplicate" is enabled then check to ensure unique constraint is not violoated.  
   // If it is violoated then simply return, the record already exists in the database.
   // We check for insert_unique for backwards compatibilty but that mode no longer exists
-  if (preg_match('/insert_unique/', $table_data['mode']) or
+  if (preg_match('/insert_unique/', $table_data['mode']) or 
       $table_data['select_if_duplicate'] == 1) {
-    $unique = tripal_core_chado_select($table, array_keys($table_desc['fields']), $values, array('has_record' => TRUE));
-    //print 'Unique?'.print_r(array('table' => $table, 'columns' => array_keys($table_desc['fields']), 'values' => $values),TRUE).' returns '.$unique."\n";
-    if ($unique > 0) {
-      //$default_data[$priority]['inserted'] = TRUE;
-      //watchdog('T_bulk_loader', $header.': Not unique ('.$unique.'):'.print_r($values,'values')."\n".print_r($data,TRUE),array(),WATCHDOG_NOTICE);;
+    $options = array('use_unique' => TRUE);
+    $duplicate = tripal_core_chado_select($table, array_keys($table_desc['fields']), $values, $options); 
+    // if this is a duplicate then substitute the values in the table_data array so
+    // that for future records that my depend on this one, they can get the values 
+    // needed
+    if (count($duplicate) > 0) {
+
+      // if all we have is one field then we will just use the value returned
+      // rather than create an array of values. This way it will prevent 
+      // the tripal_core_chado_(select|insert|update) from recursing on 
+      // foreign keys and make the loader go faster.
+      if (count($duplicate[0]) == 1) {
+        foreach($duplicate[0] as $key => $value){ 
+          $data[$priority]['values_array'] = $value;
+        } 
+      }
+      // if we have multiple fields returned then we need to set the values
+      // the new array.
+      else {
+        // convert object to array
+        $new_values = array();
+        foreach($duplicate[0] as $key => $value){ 
+          $new_values[$key] = $value;
+        } 
+        $data[$priority]['values_array'] = $new_values;        
+      }
+      // reset values in data array
       return $no_errors;
     }
   }
 
-
   if (!preg_match('/select/', $table_data['mode'])) {
     // Use prepared statement?
     if (variable_get('tripal_bulk_loader_prepare', TRUE)) {
@@ -575,7 +598,13 @@ function process_data_array_for_line($priority, &$data, &$default_data, $addt) {
       $options['skip_validation'] = TRUE;
     }
 
-    $record = tripal_core_chado_insert($table, $values, $options);
+    if ($table_data['update_if_duplicate'] == 1) {
+      // TODO: handle updates
+      //$record = tripal_core_chado_update($table, $values, $options);
+    }
+    else {
+      $record = tripal_core_chado_insert($table, $values, $options);      
+    }
 
     if (!$record) {
       $msg = "\nLine " . $addt->line_num . ' ' . $table_data['record_id'] . ' (' . $table_data['mode'] . ') Unable to insert record into ' . $table . ' where values:' . print_r($values, TRUE);
@@ -585,9 +614,18 @@ function process_data_array_for_line($priority, &$data, &$default_data, $addt) {
       $no_errors = FALSE;
     }
     else {
-      //add changes back to values array
-      $data[$priority]['values_array'] = $record;
-      $values = $record;
+      // substitute the values array for the primary key if it exists
+      // and is a single field
+      if(array_key_exists('primary key',$table_desc)){
+        if(count($table_desc['primary key']) == 1){
+          $pkey_field = $table_desc['primary key'][0];
+          $data[$priority]['values_array'] = $record[$pkey_field];
+        }
+      } else {
+        //add changes back to values array
+        $data[$priority]['values_array'] = $record;
+        $values = $record;
+      }
 
       // if mode=insert_once then ensure we only insert it once
       if (preg_match('/insert_once/', $table_data['mode'])) {
@@ -623,11 +661,27 @@ function process_data_array_for_line($priority, &$data, &$default_data, $addt) {
     } //end of if insert was successful
   }
   else {
-    $exists = tripal_core_chado_select($table, array_keys($table_desc['fields']), $values, array('has_record' => TRUE));
+    // get the matches for this select
+    $matches = tripal_core_chado_select($table, array_keys($table_desc['fields']), $values);
+    
     // if the record doesn't exists and it's not optional then generate an error
-    if (!$exists and $table_data['optional'] != 1) {
+    if (count($matches) == 0){
       // No record on select
-      $msg = "\nLine " . $addt->line_num . ' ' . $table_data['record_id'] . ' (' . $table_data['mode'] . ') No Matching record in ' . $table . ' where values:' . print_r($values, TRUE);
+      if ($table_data['select_optional'] != 1) {        
+        $msg = "\nLine " . $addt->line_num . ' ' . $table_data['record_id'] . ' (' . $table_data['mode'] . ') No Matching record in ' . $table . ' where values:' . print_r($values, TRUE);
+        watchdog('T_bulk_loader', $msg, array(), WATCHDOG_WARNING);
+        $data[$priority]['error'] = TRUE;
+        $no_errors = FALSE;
+      }
+      // there is no match and select optional is turned on, so we want to set
+      // the values to empty for any records with an FK relationship on this one
+      else {
+        $data[$priority]['values_array'] = NULL;
+      }
+    }
+    // if we have more than one record matching then this isn't good. We have to error out.
+    if(count($matches) > 1){
+      $msg = "\nLine " . $addt->line_num . ' ' . $table_data['record_id'] . ' (' . $table_data['mode'] . ') Too many matching records in ' . $table . ' where values:' . print_r($values, TRUE);
       watchdog('T_bulk_loader', $msg, array(), WATCHDOG_WARNING);
       $data[$priority]['error'] = TRUE;
       $no_errors = FALSE;

+ 63 - 0
tripal_core/api/tripal_core.api.inc

@@ -842,6 +842,16 @@ function tripal_core_chado_delete($table, $match) {
  *     default if the statement is not prepared it will be automatically.
  *     However to avoid this check, which requires a database query you can
  *     set this value to true and the check will not be performed.
+ *  - use_unique: TRUE or FALSE.  This will alter the values array provided
+ *     to the function and remove fields not in 
+ *     a unique constraint.  This is useful before an insert to check if 
+ *     another record exists with the same unique constraint (but possible differences
+ *     in the other fields) and cause a duplicate insert error.  If a field
+ *     in the unique constraint is missing from the values then the function
+ *     will return false.  If the table has a primary key, the resulting 
+ *     record will only have the pkey field. If there is no primary key the
+ *     resulting record will have the unique constraint fields.
+ *     
  *
  * @return
  *  A database query result resource, FALSE if the query was not executed
@@ -910,6 +920,9 @@ function tripal_core_chado_select($table, $columns, $values, $options = NULL) {
   if (!array_key_exists('statement_name',$options)) {
     $options['statement_name'] = FALSE;
   }
+  if (!array_key_exists('use_unique',$options)) {
+    $options['use_unique'] = FALSE;
+  }
 
   // if this is a prepared statement check to see if it has already been prepared
   if ($options['statement_name']) {
@@ -940,6 +953,56 @@ function tripal_core_chado_select($table, $columns, $values, $options = NULL) {
   $from = '';
   $where = '';
   $args = array();
+  
+  // if the 'use_unique' option is turned on then we want
+  // to remove all but unique keys 
+  if ($options['use_unique'] and array_key_exists('unique keys', $table_desc)) {
+    $ukeys = $table_desc['unique keys'];
+    $new_values = array();
+    $new_columns = array();
+    $uq_sname = "uq_" . $table . "_";
+    
+    // include the primary key in the results returned
+    $has_pkey = 0;
+    if (array_key_exists('primary key', $table_desc)){    
+      $has_pkey = 1;
+      $pkeys = $table_desc['primary key'];
+      foreach ($pkeys as $index => $key) {
+        array_push($new_columns, $key);
+      }
+    }
+    // iterate through the unique fields and reset the values and columns
+    // arrays to only include these fields
+    foreach ($ukeys as $cname => $fields) {
+      foreach ($fields as $field) {
+        if (array_key_exists($field, $values)) {
+          $new_values[$field] = $values[$field];
+          $uq_sname .= substr($field, 0, 2);
+          // if there is no primary key then use the unique contraint fields
+          if (!$has_pkey) {
+            array_push($new_columns,$field);
+          }
+        } 
+        elseif (array_key_exists('default',$table_desc['fields'][$field])) {
+          $new_values[$field] = $table_desc['fields'][$field]['default'];
+          $uq_sname .= substr($field, 0, 2);
+          if (!$has_pkey){
+            array_push($new_columns,$field);
+          }
+        }
+        else {
+          return FALSE;
+        }
+      }
+    }
+    // reset the values and columns
+    $values = $new_values;
+    $columns = $new_columns;
+    
+    // set the new statement name
+    $options['statement_name'] = $uq_sname;  
+  }
+
 
   foreach ($values as $field => $value) {
     $select[] = $field;