Browse Source

Tripal: Bulk Loader -Added ability to use regex to transform field values

laceysanderson 14 years ago
parent
commit
ac255baae0

+ 95 - 18
tripal_bulk_loader/tripal_bulk_loader.admin.inc

@@ -1011,6 +1011,49 @@ function tripal_bulk_loader_add_template_field_form (&$form_state = NULL) {
     ),
   );
 
+  $form['add_fields']['additional'] = array(
+    '#type' => 'fieldset',
+    '#title' => 'Additional Options',
+  );
+
+  $form['add_fields']['additional']['required'] = array(
+    '#type' => 'checkbox',
+    '#title' => 'Make this file required',
+    '#default_value' => $template_field['required'],
+  );
+  
+  $form['add_fields']['additional']['regex_transform'] = array(
+    '#type' => 'fieldset',
+    '#title' => 'Transform Spreadsheet Value Rule'
+  );
+  
+  $form['add_fields']['additional']['regex_transform']['transform'] = array(
+    '#type' => 'checkbox',
+    '#title' => 'Apply transformation to this field'
+  );
+  
+  $form['add_fields']['additional']['regex_transform']['pattern'] = array(
+    '#type' => 'textfield',
+    '#title' => 'Match Pattern',
+    '#description' => 'You can use standard php regular expressions in this field to specify a '
+      .'pattern. Only if this pattern matches the value in the spreadsheet does the replacement '
+      .'pattern get applied to the value. To capture a section of your value for use in the '
+      .'replacement patten surround with round brackets. For example, <i>GI:(\d+)</i> will match '
+      .' NCBI gi numbers and will capture the numerical digits for use in the replacement pattern. '
+      .' To match and capture any value use <i>.*</i>',
+    '#default_value' => ($template_field['regex']['pattern']) ? $template_field['regex']['pattern'] : '^(.*)$',
+  );
+  
+  $form['add_fields']['additional']['regex_transform']['replace'] = array(
+    '#type' => 'textfield',
+    '#title' => 'Replacement Pattern',
+    '#description' => 'This pattern should contain the text you want to replace the match pattern '
+    .'mentioned above. It can include references of the form \n where n is the number of the '
+    .'capture in the match pattern. For example, \1 will be replaced with the text matched in your '
+    .'first set of round brackets.',
+    '#default_value' => ($template_field['regex']['replace']) ? $template_field['regex']['replace'] : '\1'
+  ); 
+  
   $form['add_fields']['submit-add_field'] = array(
       '#type' => 'submit',
       '#value' => 'Add Field'
@@ -1054,41 +1097,44 @@ function tripal_bulk_loader_add_template_field_form_submit ($form, &$form_state)
       
       // Add field to template array
       if ($form_state['values']['field_type'] == 'table field') {
-        $template[$priority]['fields'][] = array(
+        $field = array(
           'type' => 'table field',
           'title' => $form_state['values']['field_title'],
           'field' => $form_state['values']['chado_field'],
-          //'required' => <true|false>,
+          'required' => $form_state['values']['required'],
           //'allowed values' => empty by default,
           'spreadsheet sheet' => $form_state['values']['sheet_name'],
           'spreadsheet column' => $form_state['values']['column_number'],
-          //'exposed' => 'true|false'  If exposed, will give a select box first from allowed values if set, second from database if values not set.
-          //'mapping' => array(
-          //   'from' => 'to'
-          //   '/from re/' => 'to'
-          //),        
+          //'exposed' => 'true|false'  If exposed, will give a select box first from allowed values if set, second from database if values not set.     
         );
       } elseif ($form_state['values']['field_type'] == 'constant') {
-        $template[$priority]['fields'][] = array(
+        $field = array(
           'type' => 'constant',
           'title' => $form_state['values']['field_title'],
           'field' => $form_state['values']['chado_field'],
-          //'required' => <true|false>,
+          'required' => $form_state['values']['required'],
           //'allowed values' => empty by default,
           'constant value' => $form_state['values']['constant_value'],
           //'exposed' => 'true|false'  If exposed, will give a select box first from allowed values if set, second from database if values not set.     
         );      
       } elseif ($form_state['values']['field_type'] == 'foreign key') {
-        $template[$priority]['fields'][] = array(
+        $field = array(
           'type' => 'foreign key',
           'title' => $form_state['values']['field_title'],
           'field' => $form_state['values']['chado_field'],
           'foreign key' => $form_state['values']['foreign_record'],
-          //'required' => <true|false>,   
+          'required' => $form_state['values']['required'],
         );        
       }
 
+      // Deal with any additional options
+      if ($form_state['values']['transform']) {
+        $field['regex']['pattern'] = $form_state['values']['pattern'];
+        $field['regex']['replace'] = $form_state['values']['replace'];
+      }
+
       // Save Template
+      $template[$priority]['fields'][] = $field;
       $form_state['storage']['template']->template_array = serialize($template);
       $success = drupal_write_record('tripal_bulk_loader_template', $form_state['storage']['template'], array('template_id'));
   
@@ -1396,9 +1442,41 @@ function tripal_bulk_loader_edit_template_field_form (&$form_state = NULL) {
   $form['edit_fields']['additional']['required'] = array(
     '#type' => 'checkbox',
     '#title' => 'Make this file required',
-    '#default_value' => FALSE,
+    '#default_value' => $template_field['required'],
+  );
+  
+  $form['edit_fields']['additional']['regex_transform'] = array(
+    '#type' => 'fieldset',
+    '#title' => 'Transform Spreadsheet Value Rule'
+  );
+  
+  $form['edit_fields']['additional']['regex_transform']['transform'] = array(
+    '#type' => 'checkbox',
+    '#title' => 'Apply transformation to this field'
+  );
+  
+  $form['edit_fields']['additional']['regex_transform']['pattern'] = array(
+    '#type' => 'textfield',
+    '#title' => 'Match Pattern',
+    '#description' => 'You can use standard php regular expressions in this field to specify a '
+      .'pattern. Only if this pattern matches the value in the spreadsheet does the replacement '
+      .'pattern get applied to the value. To capture a section of your value for use in the '
+      .'replacement patten surround with round brackets. For example, <i>GI:(\d+)</i> will match '
+      .' NCBI gi numbers and will capture the numerical digits for use in the replacement pattern. '
+      .' To match and capture any value use <i>.*</i>',
+    '#default_value' => ($template_field['regex']['pattern']) ? $template_field['regex']['pattern'] : '^(.*)$',
   );
   
+  $form['edit_fields']['additional']['regex_transform']['replace'] = array(
+    '#type' => 'textfield',
+    '#title' => 'Replacement Pattern',
+    '#description' => 'This pattern should contain the text you want to replace the match pattern '
+    .'mentioned above. It can include references of the form \n where n is the number of the '
+    .'capture in the match pattern. For example, \1 will be replaced with the text matched in your '
+    .'first set of round brackets.',
+    '#default_value' => ($template_field['regex']['replace']) ? $template_field['regex']['replace'] : '\1'
+  );  
+  
   $form['edit_fields']['submit-edit_field'] = array(
       '#type' => 'submit',
       '#value' => 'Edit Field'
@@ -1452,11 +1530,7 @@ function tripal_bulk_loader_edit_template_field_form_submit ($form, &$form_state
           //'allowed values' => empty by default,
           'spreadsheet sheet' => $form_state['values']['sheet_name'],
           'spreadsheet column' => $form_state['values']['column_number'],
-          //'exposed' => 'true|false'  If exposed, will give a select box first from allowed values if set, second from database if values not set.
-          //'mapping' => array(
-          //   'from' => 'to'
-          //   '/from re/' => 'to'
-          //),        
+          //'exposed' => 'true|false'  If exposed, will give a select box first from allowed values if set, second from database if values not set.  
         );
       } elseif ($form_state['values']['field_type'] == 'constant') {
         $field = array(
@@ -1479,7 +1553,10 @@ function tripal_bulk_loader_edit_template_field_form_submit ($form, &$form_state
       }
 
       // Deal with any additional options
-      dpm($form_state['values'], 'values');
+      if ($form_state['values']['transform']) {
+        $field['regex']['pattern'] = $form_state['values']['pattern'];
+        $field['regex']['replace'] = $form_state['values']['replace'];
+      }
       
       // if the record has changed...
       $form_state['storage']['template_array'][$priority]['table'] = $form_state['values']['chado_table'];

+ 33 - 6
tripal_bulk_loader/tripal_bulk_loader.loader.inc

@@ -95,7 +95,11 @@ function tripal_bulk_loader_load_data($nid) {
       $default_data[$priority]['record_id'] = $record_array['record_id'];
       $record2priority[$record_array['record_id']] = $priority;
       $default_data[$priority]['required'][$field_array['field']] = $field_array['required'];
-        
+      
+      if (isset($field_array['regex'])) {
+        $default_data[$priority]['regex_transform'][$field_array['field']] = $field_array['regex'];
+      }
+      
       if (preg_match('/table field/', $field_array['type'])) {
         $default_data[$priority]['values_array'][$field_array['field']] = '';
         $default_data[$priority]['need_further_processing'] = TRUE;
@@ -141,7 +145,8 @@ function tripal_bulk_loader_load_data($nid) {
         $values = tripal_bulk_loader_add_spreadsheetdata_to_values ($values, $line, $field2column[$priority]);
         $values = tripal_bulk_loader_add_foreignkey_to_values($values, $data, $record2priority);
       }
-      
+      $values = tripal_bulk_loader_regex_tranform_values($values, $table_data);
+
       if (!$values) {
         $msg = $table_data['record_id'].' ('.$table_data['mode'].') Aborted due to error in previous record.';
         watchdog('T_bulk_loader', $msg, array(), WATCHDOG_WARNING); 
@@ -268,11 +273,12 @@ function tripal_bulk_loader_add_foreignkey_to_values($values, $data, $record2pri
       $foreign_priority = $record2priority[$foreign_record];
       $foreign_values = $data[$foreign_priority]['values_array'];
       
-      //add to current values array
-      if (!$data[$foreign_priority]['error']) {
-        $values[$field] = $foreign_values;
-      } else {
+      // add to current values array
+      // only if there was no error found when the foreign record was processed
+      if (isset($data[$foreign_priority]['error']) and $data[$foreign_priority]['error']) {
         return FALSE;
+      } else {
+        $values[$field] = $foreign_values;
       }
     }
   }
@@ -280,6 +286,27 @@ function tripal_bulk_loader_add_foreignkey_to_values($values, $data, $record2pri
   return $values;
 }
 
+/**
+ * Uses a supplied regex to transform spreadsheet values
+ *
+ * @param $values
+ *   The select/insert values array for the given table
+ * @param $table_data
+ *   The data array for the given table
+ */
+function tripal_bulk_loader_regex_tranform_values ($values, $table_data) {
+
+  foreach ($table_data['regex_transform'] as $field => $regex_array) {
+    if (!is_array($regex_array)) { continue; }
+    
+    $old_value = $values[$field];
+    $new_value = preg_replace('/'.$regex_array['pattern'].'/', $regex_array['replace'], $old_value);
+    $values[$field] = $new_value;
+  }
+
+  return $values;
+}
+
 /**
  * Flattens an array up to two levels
  * Used for printing of arrays without taking up much space