Procházet zdrojové kódy

Now keeps track of progress within a constant set through use of a file -refreshing the node updates the job progress taking into account the progress within the constant set

Lacey Sanderson před 12 roky
rodič
revize
21fd545316

+ 55 - 3
tripal_bulk_loader/tripal_bulk_loader.loader.inc

@@ -14,8 +14,11 @@
 function tripal_bulk_loader_add_loader_job_form($form_state, $node) {
   $form = array();
 
-  if (($node->job_status == 'Loading...') AND (variable_get('tripal_bulk_loader_transactions',   'row') != 'none')) {
-    drupal_set_message("Job Progress and Loading Summary will only be updated at the end of each constant set.","warning");
+  // --notify--
+  if ($node->job_status == 'Loading...' AND (variable_get('tripal_bulk_loader_transactions',   'row') != 'none')) {
+    drupal_set_message(t("The Loading Summary only updates at the end of each constant set.
+      %num records have already been inserted; however, they won't be available until the
+      current constant set is full loaded and no errors are encountered.", array('%num' => $progress->num_records)), 'warning');
   }
 
   $form['nid'] = array(
@@ -117,7 +120,7 @@ function tripal_bulk_loader_add_loader_job_form_submit($form, $form_state) {
  * Note: Instead of returning a value this function updates the tripal_bulk_loader.status.
  *   Errors are thrown through watchdog and can be viewed at admin/reports/dblog.
  */
-function tripal_bulk_loader_load_data($nid) {
+function tripal_bulk_loader_load_data($nid, $job_id) {
 
   // ensure no timeout
   set_time_limit(0);
@@ -328,6 +331,7 @@ function tripal_bulk_loader_load_data($nid) {
       $data_keys = array_keys($data);
       foreach ($data_keys as $priority) {
         $status = process_data_array_for_line($priority, $data, $default_data, $field2column, $record2priority, $line, $nid, $num_lines, $group_index);
+        tripal_bulk_loader_progress_file_track_job($job_id, $status);
         if (!$status ) {
           // Encountered an error
           if ($transactions) {
@@ -338,6 +342,8 @@ function tripal_bulk_loader_load_data($nid) {
         }
       } // end of foreach table in default data array
 
+      tripal_bulk_loader_progress_file_track_job($job_id, FALSE, TRUE);
+
       if ($failed) {
         break;
       }
@@ -369,6 +375,7 @@ function tripal_bulk_loader_load_data($nid) {
     }
 
     tripal_bulk_loader_progress_bar($total_lines,$total_lines);
+    tripal_bulk_loader_progress_file_track_job($job_id, FALSE, FALSE, TRUE);
   } //end of foreach constant set
 
   // check that data was inserted and update job_status
@@ -756,3 +763,48 @@ function tripal_bulk_loader_progress_bar($current=0, $total=100, $size=50) {
   }
 
 }
+
+/**
+ * Keep track of progress in file rather then database
+ *
+ * This provides an alternative method to keep track of progress that doesn't require the
+ * database. It was needed because you can't switch databases within a transaction...
+ * Waiting until the end of a constant set is much too long to wait for any indication
+ * that things are working.
+ *
+ * Each line represents a line processed in the loading file. Each period (.) represents
+ * a successfully inserted record.
+ *
+ * @param $job_id
+ *   The ID of the current tripal job
+ * @param $record_added
+ *   A boolean indicated whether a record was added successfully
+ * @param $line_complete
+ *   A boolean indicating whether the current line is finished
+ * @param $close
+ *   A boolean indicating that the file should be closed
+ */
+function tripal_bulk_loader_progress_file_track_job($job_id, $record_added, $line_complete = FALSE, $close = FALSE) {
+  // retrieve the file handle
+  $file_handle = variable_get('tripal_bulk_loader_progress_file_handle',NULL);
+
+  // open file for reading if not already
+  if (!$file_handle) {
+    $file_handle = fopen('/tmp/tripal_bulk_loader_progress-'.$job_id.'.out', 'w');
+    variable_set('tripal_bulk_loader_progress_file_handle', $file_handle);
+  }
+
+  if ($record_added) {
+    fwrite($file_handle,'.');
+  }
+
+  if ($line_complete) {
+    fwrite($file_handle,"\n");
+  }
+
+  // close the file if finished
+  if ($close) {
+    fclose($file_handle);
+    variable_set('tripal_bulk_loader_progress_file_handle', NULL);
+  }
+}

+ 71 - 0
tripal_bulk_loader/tripal_bulk_loader.module

@@ -328,6 +328,7 @@ function tripal_bulk_loader_load($node) {
   $node->title = 'Bulk Loading Job: ' . $node->loader_name;
 
   // Add job details
+  $progress = tripal_bulk_loader_progess_file_get_progress($node->job_id);
   $sql = "SELECT * FROM {tripal_jobs} WHERE job_id=%d";
   $node->job = db_fetch_object(db_query($sql, $node->job_id));
 
@@ -457,6 +458,76 @@ function tripal_bulk_loader_preprocess_tripal_bulk_loader_template(&$variables)
 
 }
 
+/**
+ * Get the progress of the current constant set from the progress file
+ *
+ * When transactions are used, database updates to drupal cannot be made. Thus a separate
+ * method to keep track of progress was implemented: save a period to the file for each
+ * record successfully inserted; each line in the file represents a processed line.
+ *
+ * @param $job_id
+ *   The id of the tripal job to check the progress of
+ * @param $node
+ *   The tripal_bulk_loader node associated with the job
+ *
+ * @return
+ *   An array with the following keys:
+ *     num_lines = the number of lines in the file processed so far
+ *     total_lines = the total number of lines in the input file
+ *     percent_file = the percent the input file has been loaded
+ *     num_records = the number of records successfully inserted
+ */
+function tripal_bulk_loader_progess_file_get_progress($job_id, $update_progress = TRUE) {
+  $filename = '/tmp/tripal_bulk_loader_progress-'.$job_id.'.out';
+  $num_lines = trim(`wc --lines < $filename`);
+  $num_records = trim(`grep -c "." $filename`);
+
+  $job = db_fetch_object(db_query("SELECT j.*, b.file, b.file_has_header, c.num as num_constant_sets
+                              FROM {tripal_jobs} j
+                              LEFT JOIN {tripal_bulk_loader} b ON b.job_id=j.job_id
+                              LEFT JOIN (
+                                  SELECT nid, count(distinct(group_id)) as num
+                                  FROM {tripal_bulk_loader_constants}
+                                  GROUP BY nid
+                                ) c ON c.nid=b.nid
+                              WHERE j.job_id=%d", $job_id));
+  if ($job->num_constant_sets) {
+    $num_constant_sets_loaded = round($job->progress / (100 / $job->num_constant_sets),4);
+
+    // If the next constant set has started loading
+    if ($job->num_constant_sets != $num_constant_sets_loaded) {
+
+      // total lines in input file
+      $total_lines = trim(`wc --lines < $job->file`);
+      if ($job->file_has_header) {
+        $total_lines--;
+      }
+
+      // percent of the current constant set loaded
+      $percent = round($num_lines/$total_lines * 100, 2);
+
+      // percent of the total job = (<# fully loaded constant sets> * 100 )
+      //                           + <percent of current constant set>
+      //                           / <total number of constant sets>
+      $total_percent = (($num_constant_sets_loaded * 100) + $percent) / $job->num_constant_sets;
+
+      // update the progress of the job
+      if ($update_progress AND ($percent != 0 OR $percent != 100)) {
+        tripal_job_set_progress($job_id, round($total_percent, 0));
+      }
+    }
+  }
+
+  return (object) array(
+    'num_lines' => $num_lines,
+    'total_lines' => $total_lines,
+    'percent_file' => $percent,
+    'num_constant_sets_loaded' => $num_constant_sets_loaded,
+    'total_percent' => $total_percent,
+    'num_records' => $num_records
+  );
+}
+
 /**
  * Implements hook_job_describe_args()
  * Specifically to make viewing past tripal jobs more readable for jobs registered by this module