tripal_bulk_loader.loader.inc 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. <?php
  2. /**
  3. * Add Loader Job Form
  4. *
  5. * This form is meant to be included on the node page to allow users to submit/re-submit
  6. * loading jobs
  7. */
  8. function tripal_bulk_loader_add_loader_job_form ($form_state, $node) {
  9. $form = array();
  10. $form['nid'] = array(
  11. '#type' => 'hidden',
  12. '#value' => $node->nid,
  13. );
  14. $form['file'] = array(
  15. '#type' => 'hidden',
  16. '#value' => $node->file
  17. );
  18. $form['submit'] = array(
  19. '#type' => 'submit',
  20. '#value' => 'Submit Job'
  21. );
  22. return $form;
  23. }
  24. /**
  25. * Add Loader Job Form (Submit)
  26. */
  27. function tripal_bulk_loader_add_loader_job_form_submit ($form, $form_state) {
  28. global $user;
  29. if (preg_match('/Submit Job/', $form_state['values']['op'])) {
  30. //Submit Tripal Job
  31. $job_args[1] = $form_state['values']['nid'];
  32. if (is_readable($form_state['values']['file'])) {
  33. $fname = basename($form_state['values']['file']);
  34. $job_id = tripal_add_job("Bulk Loading Job: $fname",'tripal_bulk_loader', 'tripal_bulk_loader_load_data', $job_args, $user->uid);
  35. // add job_id to bulk_loader node
  36. } else {
  37. drupal_set_message("Can not open ".$form_state['values']['file'].". Job not scheduled.");
  38. }
  39. }
  40. }
  41. /**
  42. * Tripal Bulk Loader
  43. *
  44. * This is the function that's run by tripal_launch_jobs to bulk load chado data.
  45. * @param $nid
  46. * The Node ID of the bulk loading job node to be loaded. All other needed data is expected to be
  47. * in the node (ie: template ID and file)
  48. *
  49. * Note: Instead of returning a value this function updates the tripal_bulk_loader.status and
  50. * Enters errors into tripal_bulk_loader_errors if they are encountered.
  51. */
  52. function tripal_bulk_loader_load_data($nid) {
  53. $node = node_load($nid);
  54. print "Template: ".$node->template->name." (".$node->template_id.")\n";
  55. print "File: ".$node->file."\n";
  56. // Prep Work ==================================================================================
  57. // Generate default values array
  58. $default_data = array();
  59. $field2column = array();
  60. $record2priority = array();
  61. foreach ($node->template->template_array as $priority => $record_array) {
  62. if (!is_array($record_array)) { continue; }
  63. foreach ($record_array['fields'] as $field_index => $field_array) {
  64. $default_data[$priority]['table'] = $record_array['table'];
  65. $record2priority[$record_array['record_id']] = $priority;
  66. if (preg_match('/table field/', $field_array['type'])) {
  67. $default_data[$priority]['values_array'][$field_array['field']] = '';
  68. $default_data[$priority]['need_further_processing'] = TRUE;
  69. $field2column[$priority][$field_array['field']] = $field_array['spreadsheet column'];
  70. } elseif (preg_match('/constant/', $field_array['type'])) {
  71. $default_data[$priority]['values_array'][$field_array['field']] = $field_array['constant value'];
  72. } elseif (preg_match('/foreign key/', $field_array['type'])) {
  73. $default_data[$priority]['values_array'][$field_array['field']] = array();
  74. $default_data[$priority]['values_array'][$field_array['field']]['foreign record'] = $field_array['foreign key'];
  75. $default_data[$priority]['need_further_processing'] = TRUE;
  76. } else {
  77. print 'WARNING: Unsupported type: '. $field_array['type'] . ' for ' . $table . '.' . $field_array['field']."!\n";
  78. }
  79. } // end of foreach field
  80. } //end of foreach record
  81. //print "\nDefault Values Array: ".print_r($default_data, TRUE)."\n";
  82. //print "\nField to Column Mapping: ".print_r($field2column, TRUE)."\n";
  83. // Parse File adding records as we go ========================================================
  84. $file_handle = fopen($node->file, 'r');
  85. if (preg_match('/(t|true|1)/', $node->file_has_header)) { fgets($file_handle, 4096); }
  86. $num_records = 0;
  87. $num_lines = 0;
  88. $num_errors = 0;
  89. while (!feof($file_handle)) {
  90. $line = array();
  91. $raw_line = fgets($file_handle, 4096);
  92. $raw_line = trim($raw_line);
  93. $line = preg_split("/\t/", $raw_line);
  94. $num_lines++;
  95. $data = $default_data;
  96. foreach ($data as $priority => $table_data) {
  97. $table = $table_data['table'];
  98. $values = $table_data['values_array'];
  99. if ($table_data['need_further_processing']) {
  100. $values = tripal_bulk_loader_add_spreadsheetdata_to_values ($values, $line, $field2column[$priority]);
  101. $values = tripal_bulk_loader_add_foreignkey_to_values($values, $data, $record2priority);
  102. }
  103. // add new values array into the data array
  104. $data[$priority]['values_array'] = $values;
  105. // first check if it already exists
  106. $exists = tripal_core_chado_select($table, array_keys($values), $values, array('has_record'=>TRUE));
  107. if ($exists) {
  108. watchdog('T_bulk_loader',
  109. 'Record already exists in %table: %record',
  110. array('%table' => $table, '%record' => tripal_bulk_loader_flatten_array($values)),
  111. 'WATCHDOG_WARNING'
  112. );
  113. } else {
  114. // if it doesn't exist already then insert it
  115. $success = tripal_core_chado_insert($table, $values);
  116. if (!$success) {
  117. watchdog('T_bulk_loader',
  118. 'Unable to insert the following record into %table: %record',
  119. array('%table' => $table, '%record' => tripal_bulk_loader_flatten_array($values)),
  120. 'WATCHDOG_ERROR'
  121. );
  122. }
  123. }// end of if/not record exists
  124. } // end of foreach table in default data array
  125. } //end of foreach line of file
  126. }
  127. /**
  128. * This function adds the file data to the values array
  129. *
  130. * @param $values
  131. * The default values array -contains all constants
  132. * @param $line
  133. * An array of values for the current line
  134. * @param $field2column
  135. * An array mapping values fields to line columns
  136. * @return
  137. * Supplemented values array
  138. */
  139. function tripal_bulk_loader_add_spreadsheetdata_to_values ($values, $line, $field2column) {
  140. foreach ($values as $field => $value) {
  141. if (is_array($value)) { continue; }
  142. $column = $field2column[$field] - 1;
  143. if ($line[$column] OR (!$values[$field])) {
  144. $values[$field] = $line[$column];
  145. }
  146. }
  147. return $values;
  148. }
  149. /**
  150. * Handles foreign keys in the values array.
  151. *
  152. * Specifically, if the value for a field is an array then it is assumed that the array contains
  153. * the name of the record whose values array should be substituted here. Thus the foreign
  154. * record is looked up and the values array is substituted in.
  155. *
  156. */
  157. function tripal_bulk_loader_add_foreignkey_to_values($values, $data, $record2priority) {
  158. foreach ($values as $field => $value) {
  159. if (is_array($value)) {
  160. $foreign_record = $value['foreign record'];
  161. $foreign_priority = $record2priority[$foreign_record];
  162. $foreign_values = $data[$foreign_priority]['values_array'];
  163. //add to current values array
  164. $values[$field] = $foreign_values;
  165. }
  166. }
  167. return $values;
  168. }
  169. /**
  170. * Flattens an array up to two levels
  171. * Used for printing of arrays without taking up much space
  172. */
  173. function tripal_bulk_loader_flatten_array ($values) {
  174. $flattened_values = array();
  175. foreach ($values as $k => $v) {
  176. if (is_array($v)) {
  177. $vstr = array();
  178. foreach ($v as $vk => $vv) {
  179. if (strlen($vv) > 20) {
  180. $vstr[] = $vk .'=>'. substr($vv, 0, 20) . '...';
  181. } else {
  182. $vstr[] = $vk .'=>'. $vv;
  183. }
  184. }
  185. $v = '{'. implode(',',$vstr) .'}';
  186. } elseif (strlen($v) > 20) {
  187. $v = substr($v, 0, 20) . '...';
  188. }
  189. $flattened_values[] = $k .'=>'. $v;
  190. }
  191. return $flattened_values;
  192. }