tripal_bulk_loader.loader.inc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. <?php
  2. /**
  3. * Add Loader Job Form
  4. *
  5. * This form is meant to be included on the node page to allow users to submit/re-submit
  6. * loading jobs
  7. */
  8. function tripal_bulk_loader_add_loader_job_form ($form_state, $node) {
  9. $form = array();
  10. $form['nid'] = array(
  11. '#type' => 'hidden',
  12. '#value' => $node->nid,
  13. );
  14. $form['file'] = array(
  15. '#type' => 'hidden',
  16. '#value' => $node->file
  17. );
  18. $form['submit'] = array(
  19. '#type' => 'submit',
  20. '#value' => 'Submit Job'
  21. );
  22. return $form;
  23. }
  24. /**
  25. * Add Loader Job Form (Submit)
  26. */
  27. function tripal_bulk_loader_add_loader_job_form_submit ($form, $form_state) {
  28. global $user;
  29. if (preg_match('/Submit Job/', $form_state['values']['op'])) {
  30. //Submit Tripal Job
  31. $job_args[1] = $form_state['values']['nid'];
  32. if (is_readable($form_state['values']['file'])) {
  33. $fname = basename($form_state['values']['file']);
  34. tripal_add_job("Bulk Loading Job: $fname",'tripal_bulk_loader', 'tripal_bulk_loader_load_data', $job_args, $user->uid);
  35. } else {
  36. drupal_set_message("Can not open ".$form_state['values']['file'].". Job not scheduled.");
  37. }
  38. }
  39. }
  40. /**
  41. * Tripal Bulk Loader
  42. *
  43. * This is the function that's run by tripal_launch_jobs to bulk load chado data.
  44. * @param $nid
  45. * The Node ID of the bulk loading job node to be loaded. All other needed data is expected to be
  46. * in the node (ie: template ID and file)
  47. *
  48. * Note: Instead of returning a value this function updates the tripal_bulk_loader.status and
  49. * Enters errors into tripal_bulk_loader_errors if they are encountered.
  50. */
  51. function tripal_bulk_loader_load_data($nid) {
  52. $node = node_load($nid);
  53. print "Template: ".$node->template->name." (".$node->template_id.")\n";
  54. print "File: ".$node->file."\n";
  55. // Prep Work ==================================================================================
  56. // get base table and it's table_description array
  57. $base_table = $node->template->template_array['module'];
  58. $base_table_desc = module_invoke_all('chado_'.$base_table.'_schema');
  59. $related_tables = module_invoke_all('tripal_bulk_loader_'.$base_table.'_related_tables');
  60. //print "Base Table: ".print_r($base_table,TRUE)."\nTable DEscription: ".print_r($base_table_desc, TRUE)."\nTemplate array: ".print_r($node->template->template_array,TRUE)."\n";
  61. // get a default values array to be passed into tripal_core_chado_insert
  62. // and get a mapping between table.field and spreadsheet column
  63. // also determine relationship between each table and base table
  64. $default_values_array = array();
  65. $field2column = array();
  66. $relationships2base = array();
  67. $all_tables = array();
  68. foreach ($node->template->template_array as $table => $table_array) {
  69. if (is_array($table_array)) {
  70. $all_tables[$table] = $table;
  71. foreach ($table_array['field'] as $field_array) {
  72. if (preg_match('/table field/', $field_array['type'])) {
  73. $default_values_array[$table][$field_array['field']] = '';
  74. $field2column[$table][$field_array['field']] = $field_array['spreadsheet column'];
  75. } elseif (preg_match('/constant/', $field_array['type'])) {
  76. $default_values_array[$table][$field_array['field']] = $field_array['constant value'];
  77. } else {
  78. print 'WARNING: Unsupported type: '. $field_array['type'] . ' for ' . $table . '.' . $field_array['field']."!\n";
  79. }
  80. }
  81. // Determine what relation is between this table and the base table-----------------------
  82. // This is used later to link the various records
  83. //print "===\n";
  84. // Is there a foreign key to this table in the base table?
  85. // ie: feature.type_id (base) for cvterm (current)
  86. //print "A) Is there fkey to ".$table." table in ".$base_table."?\n";
  87. if ($related_tables['Foreign Key Relations'][$table]) {
  88. //print "YES!\n";
  89. $relationships2base['foreign key'][$table] = $base_table_desc['foreign keys'][$table];
  90. }
  91. // Is there a foreign key in this table to the base table?
  92. // ie: featureloc.feature_id (current) for feature (base)
  93. //print "B) Does ".$table." contain a fkey to ".$base_table."?\n";
  94. if ($related_tables['Direct Relations'][$table]) {
  95. $table_desc = module_invoke_all('chado_'.$table.'_schema');
  96. //print "YES!\n";
  97. $relationships2base['direct'][$table] = $table_desc['foreign keys'][$base_table];
  98. }
  99. // Is there a linking table which links this table with the base table
  100. // ie: analysisfeature (current) links analysis and feature (base)
  101. //print "C) Is ".$table." a linking table?\n";
  102. if ($related_tables['Indirect Relations'][$table]) {
  103. //print "YES!\n";
  104. $table_desc = module_invoke_all('chado_'.$table.'_schema');
  105. $relationships2base['indirect'][$table] = $table_desc['foreign keys'];
  106. }
  107. }
  108. }
  109. //print "\nDefault Values Array: ".print_r($default_values_array, TRUE)."\n";
  110. //print "\nField to Column Mapping: ".print_r($field2column, TRUE)."\n";
  111. //print "\nRelationships to Base Table: ".print_r($relationships2base, TRUE)."\n";
  112. // Parse File adding records as we go ========================================================
  113. $file_handle = fopen($node->file, 'r');
  114. if (preg_match('/(t|true|1)/', $node->file_has_header)) { fgets($file_handle, 4096); }
  115. $num_records = 0;
  116. $num_lines = 0;
  117. $num_errors = 0;
  118. while (!feof($file_handle)) {
  119. $line = array();
  120. $raw_line = fgets($file_handle, 4096);
  121. $line = preg_split("/\t/", $raw_line);
  122. $num_lines++;
  123. // Contains constants set above
  124. $tables = $all_tables;
  125. $values = $default_values_array;
  126. // Insert base record-----------------------------------------------------------------------
  127. foreach ($relationships2base['foreign key'] as $table => $desc) {
  128. // Ensure foreign key is present
  129. $values[$table] = tripal_bulk_loader_supplement_values_array ($values[$table], $line, $field2column[$table]);
  130. $success = tripal_bulk_loader_ensure_record ($table, $values[$table]);
  131. if ($success) {
  132. $num_records++;
  133. } else {
  134. $num_errors++;
  135. }
  136. //Add to base values array
  137. foreach ($desc['columns'] as $foreign_key => $primary_key) {
  138. $values[$base_table][$foreign_key] = $values[$table];
  139. }
  140. unset($tables[$table]);
  141. }
  142. // base table
  143. $values[$base_table] = tripal_bulk_loader_supplement_values_array ($values[$base_table], $line, $field2column[$base_table]);
  144. $success = tripal_bulk_loader_ensure_record ($base_table, $values[$base_table]);
  145. //print 'Base Table values array: '.print_r($values[$base_table], TRUE)."\n";
  146. if (!$success) {
  147. print "ERROR: Unable to insert base record where base table: ".$base_table.
  148. " and values array: ".tripal_bulk_loader_flatten_array($values[$base_table])."\n";
  149. $num_errors++;
  150. } else {
  151. $num_records++;
  152. }
  153. unset($tables[$base_table]);
  154. // Insert all tables with direct relationship to base ----------------------------------------
  155. foreach ($relationships2base['direct'] as $table => $desc) {
  156. //Add base to values array
  157. foreach ($desc['columns'] as $foreign_key => $primary_key) {
  158. $values[$table][$foreign_key] = $values[$base_table];
  159. }
  160. // Supplement and Add record
  161. $values[$table] = tripal_bulk_loader_supplement_values_array ($values[$table], $line, $field2column[$table]);
  162. $success = tripal_bulk_loader_ensure_record ($table, $values[$table]);
  163. if ($success) {
  164. $num_records++;
  165. } else {
  166. $num_errors++;
  167. }
  168. unset($tables[$table]);
  169. }
  170. // Add in all other tables -----------------------------------------------------------------
  171. foreach ($tables as $table) {
  172. // Don't insert if its an indirect relationship linking table
  173. if (!$relationships2base['indirect'][$table]) {
  174. // Supplement and Add record
  175. $values[$table] = tripal_bulk_loader_supplement_values_array ($values[$table], $line, $field2column[$table]);
  176. $success = tripal_bulk_loader_ensure_record ($table, $values[$table]);
  177. if ($success) {
  178. $num_records++;
  179. } else {
  180. $num_errors++;
  181. }
  182. }
  183. }
  184. // Add in indirect relationships -----------------------------------------------------------
  185. foreach ($relationships2base['indirect'] as $table => $desc) {
  186. // Add foreign keys to values array
  187. foreach ($desc as $subtable => $subdesc) {
  188. foreach ($subdesc['columns'] as $foreign_key => $primary_key) {
  189. $values[$table][$foreign_key] = $values[$subtable];
  190. }
  191. }
  192. // Supplement and Add record
  193. $values[$table] = tripal_bulk_loader_supplement_values_array ($values[$table], $line, $field2column[$table]);
  194. $success = tripal_bulk_loader_ensure_record ($table, $values[$table]);
  195. if ($success) {
  196. $num_records++;
  197. } else {
  198. $num_errors++;
  199. }
  200. }
  201. } //end of file
  202. print "\nNumber of Records Inserted:".$num_records."\n";
  203. print "Number of Lines in File:".$num_lines."\n";
  204. print "Number of Errors: ".$num_errors."\n\n";
  205. if (!$num_errors) {
  206. db_query("UPDATE {tripal_bulk_loader} SET job_status='%s' WHERE nid=%d", 'successfully loaded', $nid);
  207. } else {
  208. db_query("UPDATE {tripal_bulk_loader} SET job_status='%s' WHERE nid=%d", 'load attempted', $nid);
  209. }
  210. }
  211. /**
  212. * This function adds the file data to the values array
  213. *
  214. * @param $values
  215. * The default values array -contains all constants
  216. * @param $line
  217. * An array of values for the current line
  218. * @param $field2column
  219. * An array mapping values fields to line columns
  220. * @return
  221. * Supplemented values array
  222. */
  223. function tripal_bulk_loader_supplement_values_array ($values, $line, $field2column) {
  224. foreach ($values as $field => $value) {
  225. $column = $field2column[$field] - 1;
  226. if ($line[$column] OR (!$values[$field])) {
  227. $values[$field] = $line[$column];
  228. }
  229. }
  230. return $values;
  231. }
  232. /**
  233. * This function first ensures the record doesn't already exist and then inserts it
  234. *
  235. * @param $table
  236. * The table the record should be present in
  237. * @param $values
  238. * The values array used for selecting and/or inserting the record
  239. * @return
  240. * TRUE or FALSE based on presence or absence of record
  241. */
  242. function tripal_bulk_loader_ensure_record ($table, $values) {
  243. // get flattened values array for printing errors
  244. $flattened_values = tripal_bulk_loader_flatten_array($values);
  245. // check if record exists
  246. $has_record = tripal_core_chado_select($table, array_keys($values), $values, array('has_record' => TRUE));
  247. if ($has_record) {
  248. print "\tWARNING: Record already exists in $table where ".implode(',',$flattened_values).".\n";
  249. return true;
  250. } else {
  251. // if record doesn't exist then insert it
  252. $success = tripal_core_chado_insert($table, $values);
  253. if (!$success) {
  254. print "ERROR: Unable to insert the following record into $table: ".implode(',',$flattened_values)."\n";
  255. return false;
  256. } else {
  257. return true;
  258. }
  259. }
  260. }
  261. /**
  262. * Flattens an array up to two levels
  263. * Used for printing of arrays without taking up much space
  264. */
  265. function tripal_bulk_loader_flatten_array ($values) {
  266. $flattened_values = array();
  267. foreach ($values as $k => $v) {
  268. if (is_array($v)) {
  269. $vstr = array();
  270. foreach ($v as $vk => $vv) {
  271. if (strlen($vv) > 20) {
  272. $vstr[] = $vk .'=>'. substr($vv, 0, 20) . '...';
  273. } else {
  274. $vstr[] = $vk .'=>'. $vv;
  275. }
  276. }
  277. $v = '{'. implode(',',$vstr) .'}';
  278. } elseif (strlen($v) > 20) {
  279. $v = substr($v, 0, 20) . '...';
  280. }
  281. $flattened_values[] = $k .'=>'. $v;
  282. }
  283. return $flattened_values;
  284. }