123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341 |
- <?php
- function tripal_bulk_loader_add_loader_job_form ($form_state, $node) {
- $form = array();
-
- $form['nid'] = array(
- '#type' => 'hidden',
- '#value' => $node->nid,
- );
-
- $form['file'] = array(
- '#type' => 'hidden',
- '#value' => $node->file
- );
-
- $form['submit'] = array(
- '#type' => 'submit',
- '#value' => 'Submit Job'
- );
- return $form;
- }
- function tripal_bulk_loader_add_loader_job_form_submit ($form, $form_state) {
- global $user;
-
- if (preg_match('/Submit Job/', $form_state['values']['op'])) {
-
- $job_args[1] = $form_state['values']['nid'];
- if (is_readable($form_state['values']['file'])) {
- $fname = basename($form_state['values']['file']);
- tripal_add_job("Bulk Loading Job: $fname",'tripal_bulk_loader', 'tripal_bulk_loader_load_data', $job_args, $user->uid);
- } else {
- drupal_set_message("Can not open ".$form_state['values']['file'].". Job not scheduled.");
- }
- }
- }
- function tripal_bulk_loader_load_data($nid) {
-
- $node = node_load($nid);
- print "Template: ".$node->template->name." (".$node->template_id.")\n";
- print "File: ".$node->file."\n";
-
-
-
- $base_table = $node->template->template_array['module'];
- $base_table_desc = module_invoke_all('chado_'.$base_table.'_schema');
- $related_tables = module_invoke_all('tripal_bulk_loader_'.$base_table.'_related_tables');
-
-
-
-
- $default_values_array = array();
- $field2column = array();
- $relationships2base = array();
- $all_tables = array();
- foreach ($node->template->template_array as $table => $table_groups) {
- foreach ($table_groups as $group => $table_array) {
- if (is_array($table_array)) {
- $all_tables[$table] = $table;
-
- foreach ($table_array['field'] as $field_array) {
- if (preg_match('/table field/', $field_array['type'])) {
- $default_values_array[$table][$group][$field_array['field']] = '';
- $field2column[$table][$group][$field_array['field']] = $field_array['spreadsheet column'];
- } elseif (preg_match('/constant/', $field_array['type'])) {
- $default_values_array[$table][$group][$field_array['field']] = $field_array['constant value'];
- } else {
- print 'WARNING: Unsupported type: '. $field_array['type'] . ' for ' . $table . '.' . $field_array['field']."!\n";
- }
- }
-
-
-
-
-
- if ($related_tables['Foreign Key Relations'][$table]) {
- $relationships2base['foreign key'][$table] = $base_table_desc['foreign keys'][$table];
- }
-
-
-
- if ($related_tables['Direct Relations'][$table]) {
- $table_desc = module_invoke_all('chado_'.$table.'_schema');
- $relationships2base['direct'][$table] = $table_desc['foreign keys'][$base_table];
- }
-
-
- if ($related_tables['Indirect Relations'][$table]) {
- $table_desc = module_invoke_all('chado_'.$table.'_schema');
- $relationships2base['indirect'][$table] = $table_desc['foreign keys'];
- }
-
- }
- }
- }
-
-
-
-
-
-
- $file_handle = fopen($node->file, 'r');
- if (preg_match('/(t|true|1)/', $node->file_has_header)) { fgets($file_handle, 4096); }
- $num_records = 0;
- $num_lines = 0;
- $num_errors = 0;
- while (!feof($file_handle)) {
- $line = array();
- $raw_line = fgets($file_handle, 4096);
- $line = preg_split("/\t/", $raw_line);
- $num_lines++;
-
-
- $tables = $all_tables;
- $values = $default_values_array;
-
-
- foreach ($relationships2base['foreign key'] as $table => $desc) {
-
- if (sizeof($values[$table]) > 1) {
- print "WARNING: ".$table." has more than one group. There can only be one group for a "
- ."table related to the base by a foreign key. Only the first will be used.\n";
- }
-
-
- $group = key($values[$table]);
- $sub_values = $values[$table][$group];
- $sub_values = tripal_bulk_loader_supplement_values_array ($sub_values, $line, $field2column[$table][$group]);
- $values[$table][$group] = $sub_values;
- $success = tripal_bulk_loader_ensure_record ($table, $sub_values);
- if ($success) {
- $num_records++;
- } else {
- $num_errors++;
- }
-
-
- foreach ($desc['columns'] as $foreign_key => $primary_key) {
-
- if (sizeof($values[$base_table]) > 1) {
- print "WARNING: ".$table." has more than one group. There can only be one group for a "
- ."table related to the base by a foreign key. Only the first will be used.\n";
- }
- $base_group = key($values[$base_table]);
- $values[$base_table][$base_group][$foreign_key] = $values[$table][$group];
- }
-
- unset($tables[$table]);
- }
-
-
- $values[$base_table][$base_group] = tripal_bulk_loader_supplement_values_array ($values[$base_table][$base_group], $line, $field2column[$base_table][$base_group]);
- $success = tripal_bulk_loader_ensure_record ($base_table, $values[$base_table][$base_group]);
-
- if (!$success) {
- print "ERROR: Unable to insert base record where base table: ".$base_table.
- " and values array: ".tripal_bulk_loader_flatten_array($values[$base_table][$base_group])."\n";
- $num_errors++;
- } else {
- $num_records++;
- }
- unset($tables[$base_table]);
-
-
- foreach ($relationships2base['direct'] as $table => $desc) {
- foreach ($values[$table] as $group => $sub_values) {
-
- foreach ($desc['columns'] as $foreign_key => $primary_key) {
- $values[$table][$group][$foreign_key] = $values[$base_table][$base_group];
- }
-
- $values[$table][$group] = tripal_bulk_loader_supplement_values_array ($values[$table][$group], $line, $field2column[$table][$group]);
- $success = tripal_bulk_loader_ensure_record ($table, $values[$table][$group]);
- if ($success) {
- $num_records++;
- } else {
- $num_errors++;
- }
- unset($tables[$table]);
- }
- }
-
-
- foreach ($tables as $table) {
-
- if (!$relationships2base['indirect'][$table]) {
- foreach ($values[$table] as $group => $sub_values) {
-
- $values[$table][$group] = tripal_bulk_loader_supplement_values_array ($values[$table][$group], $line, $field2column[$table][$group]);
- $success = tripal_bulk_loader_ensure_record ($table, $values[$table][$group]);
- if ($success) {
- $num_records++;
- } else {
- $num_errors++;
- }
- }
- }
- }
-
-
- foreach ($relationships2base['indirect'] as $table => $desc) {
- foreach ($values[$table] as $group => $sub_values) {
-
- foreach ($desc as $subtable => $subdesc) {
- foreach ($subdesc['columns'] as $foreign_key => $primary_key) {
- $values[$table][$group][$foreign_key] = $values[$subtable][$group];
- }
- }
-
-
- $values[$group][$table] = tripal_bulk_loader_supplement_values_array ($values[$table][$group], $line, $field2column[$table][$group]);
- $success = tripal_bulk_loader_ensure_record ($table, $values[$table][$group]);
- if ($success) {
- $num_records++;
- } else {
- $num_errors++;
- }
- }
- }
-
- }
-
- print "\nNumber of Records Inserted:".$num_records."\n";
- print "Number of Lines in File:".$num_lines."\n";
- print "Number of Errors: ".$num_errors."\n\n";
-
- if (!$num_errors) {
- db_query("UPDATE {tripal_bulk_loader} SET job_status='%s' WHERE nid=%d", 'successfully loaded', $nid);
- } else {
- db_query("UPDATE {tripal_bulk_loader} SET job_status='%s' WHERE nid=%d", 'load attempted', $nid);
- }
- }
- function tripal_bulk_loader_supplement_values_array ($values, $line, $field2column) {
- foreach ($values as $field => $value) {
- $column = $field2column[$field] - 1;
- if ($line[$column] OR (!$values[$field])) {
- $values[$field] = $line[$column];
- }
- }
-
- return $values;
- }
- function tripal_bulk_loader_ensure_record ($table, $values) {
-
-
- $flattened_values = tripal_bulk_loader_flatten_array($values);
-
-
- $has_record = tripal_core_chado_select($table, array_keys($values), $values, array('has_record' => TRUE));
- if ($has_record) {
- print "\tWARNING: Record already exists in $table where ".implode(',',$flattened_values).".\n";
- return true;
- } else {
-
- $success = tripal_core_chado_insert($table, $values);
- if (!$success) {
- print "ERROR: Unable to insert the following record into $table: ".implode(',',$flattened_values)."\n";
- return false;
- } else {
- return true;
- }
- }
-
- }
- function tripal_bulk_loader_flatten_array ($values) {
- $flattened_values = array();
-
- foreach ($values as $k => $v) {
- if (is_array($v)) {
- $vstr = array();
- foreach ($v as $vk => $vv) {
- if (strlen($vv) > 20) {
- $vstr[] = $vk .'=>'. substr($vv, 0, 20) . '...';
- } else {
- $vstr[] = $vk .'=>'. $vv;
- }
- }
- $v = '{'. implode(',',$vstr) .'}';
- } elseif (strlen($v) > 20) {
- $v = substr($v, 0, 20) . '...';
- }
- $flattened_values[] = $k .'=>'. $v;
- }
-
- return $flattened_values;
- }
|