فهرست منبع

Now keeps track of progress within a constant set through use of a file -refreshing the node updates the job progress taking into account the progress within the constant set

Lacey Sanderson 12 سال پیش
والد
کامیت
21fd545316
2فایلهای تغییر یافته به همراه126 افزوده شده و 3 حذف شده
  1. 55 3
      tripal_bulk_loader/tripal_bulk_loader.loader.inc
  2. 71 0
      tripal_bulk_loader/tripal_bulk_loader.module

+ 55 - 3
tripal_bulk_loader/tripal_bulk_loader.loader.inc

@@ -14,8 +14,11 @@
 function tripal_bulk_loader_add_loader_job_form($form_state, $node) {
   $form = array();
 
-  if (($node->job_status == 'Loading...') AND (variable_get('tripal_bulk_loader_transactions',   'row') != 'none')) {
-    drupal_set_message("Job Progress and Loading Summary will only be updated at the end of each constant set.","warning");
+  // --notify--
+  if ($node->job_status == 'Loading...' AND (variable_get('tripal_bulk_loader_transactions',   'row') != 'none')) {
+    drupal_set_message(t("The Loading Summary only updates at the end of each constant set.
+      %num records have already been inserted; however, they won't be available until the
+      current constant set is full loaded and no errors are encountered.", array('%num' => $progress->num_records)), 'warning');
   }
 
   $form['nid'] = array(
@@ -117,7 +120,7 @@ function tripal_bulk_loader_add_loader_job_form_submit($form, $form_state) {
  * Note: Instead of returning a value this function updates the tripal_bulk_loader.status.
  *   Errors are thrown through watchdog and can be viewed at admin/reports/dblog.
  */
-function tripal_bulk_loader_load_data($nid) {
+function tripal_bulk_loader_load_data($nid, $job_id) {
 
   // ensure no timeout
   set_time_limit(0);
@@ -328,6 +331,7 @@ function tripal_bulk_loader_load_data($nid) {
       $data_keys = array_keys($data);
       foreach ($data_keys as $priority) {
         $status = process_data_array_for_line($priority, $data, $default_data, $field2column, $record2priority, $line, $nid, $num_lines, $group_index);
+        tripal_bulk_loader_progress_file_track_job($job_id, $status);
         if (!$status ) {
           // Encountered an error
           if ($transactions) {
@@ -338,6 +342,8 @@ function tripal_bulk_loader_load_data($nid) {
         }
       } // end of foreach table in default data array
 
+      tripal_bulk_loader_progress_file_track_job($job_id, FALSE, TRUE);
+
       if ($failed) {
         break;
       }
@@ -369,6 +375,7 @@ function tripal_bulk_loader_load_data($nid) {
     }
 
     tripal_bulk_loader_progress_bar($total_lines,$total_lines);
+    tripal_bulk_loader_progress_file_track_job($job_id, FALSE, FALSE, TRUE);
   } //end of foreach constant set
 
   // check that data was inserted and update job_status
@@ -756,3 +763,48 @@ function tripal_bulk_loader_progress_bar($current=0, $total=100, $size=50) {
   }
 
 }
+
+/**
+ * Keep track of progress in file rather then database
+ *
+ * This provides an alternative method to keep track of progress that doesn't require the
+ * database. It was needed because you can't switch databases within a transaction...
+ * Waiting until the end of a constant set is much too long to wait for any indication
+ * that things are working.
+ *
+ * Each line represents a line processed in the loading file. Each period (.) represents
+ * a successfully inserted record.
+ *
+ * @param $job_id
+ *   The ID of the current tripal job
+ * @param $record_added
+ *   A boolean indicated whether a record was added successfully
+ * @param $line_complete
+ *   A boolean indicating whether the current line is finished
+ * @param $close
+ *   A boolean indicating that the file should be closed
+ */
+function tripal_bulk_loader_progress_file_track_job($job_id, $record_added, $line_complete = FALSE, $close = FALSE) {
+  // retrieve the file handle
+  $file_handle = variable_get('tripal_bulk_loader_progress_file_handle',NULL);
+
+  // open file for reading if not already
+  if (!$file_handle) {
+    $file_handle = fopen('/tmp/tripal_bulk_loader_progress-'.$job_id.'.out', 'w');
+    variable_set('tripal_bulk_loader_progress_file_handle', $file_handle);
+  }
+
+  if ($record_added) {
+    fwrite($file_handle,'.');
+  }
+
+  if ($line_complete) {
+    fwrite($file_handle,"\n");
+  }
+
+  // close the file if finished
+  if ($close) {
+    fclose($file_handle);
+    variable_set('tripal_bulk_loader_progress_file_handle', NULL);
+  }
+}

+ 71 - 0
tripal_bulk_loader/tripal_bulk_loader.module

@@ -328,6 +328,7 @@ function tripal_bulk_loader_load($node) {
   $node->title = 'Bulk Loading Job: ' . $node->loader_name;
 
   // Add job details
+  $progress = tripal_bulk_loader_progess_file_get_progress($node->job_id);
   $sql = "SELECT * FROM {tripal_jobs} WHERE job_id=%d";
   $node->job = db_fetch_object(db_query($sql, $node->job_id));
 
@@ -457,6 +458,76 @@ function tripal_bulk_loader_preprocess_tripal_bulk_loader_template(&$variables)
 
 }
 
+/**
+ * Get the progress of the current constant set from the progress file
+ *
+ * When transactions are used, database updates to drupal cannot be made. Thus a separate
+ * method to keep track of progress was implemented: save a period to the file for each
+ * record successfully inserted; each line in the file represents a processed line.
+ *
+ * @param $job_id
+ *   The id of the tripal job to check the progress of
+ * @param $node
+ *   The tripal_bulk_loader node associated with the job
+ *
+ * @return
+ *   An array with the following keys:
+ *     num_lines = the number of lines in the file processed so far
+ *     total_lines = the total number of lines in the input file
+ *     percent_file = the percent the input file has been loaded
+ *     num_records = the number of records successfully inserted
+ */
+function tripal_bulk_loader_progess_file_get_progress($job_id, $update_progress = TRUE) {
+  $filename = '/tmp/tripal_bulk_loader_progress-'.$job_id.'.out';
+  $num_lines = trim(`wc --lines < $filename`);
+  $num_records = trim(`grep -c "." $filename`);
+
+  $job = db_fetch_object(db_query("SELECT j.*, b.file, b.file_has_header, c.num as num_constant_sets
+                              FROM {tripal_jobs} j
+                              LEFT JOIN {tripal_bulk_loader} b ON b.job_id=j.job_id
+                              LEFT JOIN (
+                                  SELECT nid, count(distinct(group_id)) as num
+                                  FROM {tripal_bulk_loader_constants}
+                                  GROUP BY nid
+                                ) c ON c.nid=b.nid
+                              WHERE j.job_id=%d", $job_id));
+  if ($job->num_constant_sets) {
+    $num_constant_sets_loaded = round($job->progress / (100 / $job->num_constant_sets),4);
+
+    // If the next constant set has started loading
+    if ($job->num_constant_sets != $num_constant_sets_loaded) {
+
+      // total lines in input file
+      $total_lines = trim(`wc --lines < $job->file`);
+      if ($job->file_has_header) {
+        $total_lines--;
+      }
+
+      // percent of the current constant set loaded
+      $percent = round($num_lines/$total_lines * 100, 2);
+
+      // percent of the total job = (<# fully loaded constant sets> * 100 )
+      //                           + <percent of current constant set>
+      //                           / <total number of constant sets>
+      $total_percent = (($num_constant_sets_loaded * 100) + $percent) / $job->num_constant_sets;
+
+      // update the progress of the job
+      if ($update_progress AND ($percent != 0 OR $percent != 100)) {
+        tripal_job_set_progress($job_id, round($total_percent, 0));
+      }
+    }
+  }
+
+  return (object) array(
+    'num_lines' => $num_lines,
+    'total_lines' => $total_lines,
+    'percent_file' => $percent,
+    'num_constant_sets_loaded' => $num_constant_sets_loaded,
+    'total_percent' => $total_percent,
+    'num_records' => $num_records
+  );
+}
+
 /**
  * Implements hook_job_describe_args()
  * Specifically to make viewing past tripal jobs more readable for jobs registered by this module