Procházet zdrojové kódy

Restoration of functionality of National Ag Library publication loader

dsenalik před 5 roky
rodič
revize
33b5f9eeea

+ 6 - 5
tripal_chado/api/modules/tripal_chado.pub.api.inc

@@ -368,7 +368,7 @@ function chado_import_pub_by_dbxref($pub_dbxref, $do_contact = FALSE,
 
   $message = "Importing of publications is performed using a database transaction. " .
     "If the load fails or is terminated prematurely then the entire set of " .
-    "deletions is rolled back and will not be found in the database";
+    "insertions/updates is rolled back and will not be found in the database";
   tripal_report_error($message_type, TRIPAL_INFO, $message, [], $message_opts);
 
 
@@ -505,7 +505,7 @@ function chado_execute_pub_importer($import_id, $publish = TRUE,
 
   $message = "Importing of publications for this importer is performed using a database transaction. " .
     "If the load fails or is terminated prematurely then the entire set of " .
-    "deletions is rolled back and will not be found in the database";
+    "insertions/updates is rolled back and will not be found in the database";
   tripal_report_error($message_type, TRIPAL_INFO, $message, [], $message_opts);
 
 
@@ -532,9 +532,10 @@ function chado_execute_pub_importer($import_id, $publish = TRUE,
     // Loop until we have a $pubs array that does not have
     // our requested numer of records.  This means we've hit the end
     do {
-      // retrieve the pubs for this page. We'll retreive 100 at a time
+      // retrieve the pubs for this page. We'll retrieve 100 at a time
+      $npages = isset($num_pubs)?(intval($num_pubs/$num_to_retrieve)+1):'?';  // will be 0 to 99 in last page
       tripal_report_error($message_type, TRIPAL_INFO,
-        "Querying !remote_db for up to !num pubs that match the criteria.",
+        "Page ".($page+1)." of $npages. Querying !remote_db for up to !num pubs that match the criteria.",
         [
           '!num' => $num_to_retrieve,
           '!remote_db' => $remote_db,
@@ -669,7 +670,7 @@ function chado_reimport_publications($do_contact = FALSE, $dbxref = NULL,
 
   $message = "Importing of publications for this importer is performed using a database transaction. " .
     "If the load fails or is terminated prematurely then the entire set of " .
-    "deletions is rolled back and will not be found in the database";
+    "insertions/updates is rolled back and will not be found in the database";
   tripal_report_error($message_type, TRIPAL_INFO, $message, [], $message_opts);
 
   $transaction = db_transaction();

+ 81 - 31
tripal_chado/includes/loaders/tripal_chado.pub_importer_AGL.inc

@@ -30,8 +30,8 @@ function tripal_pub_remote_alter_form_AGL($form, $form_state, $num_criteria = 1)
   // So far we haven't been able to get AGL to filter results to only
   // include pubs by the XX number days in the past.  So, we will
   // change the 'days' element to be the year to query
-  $form['themed_element']['days']['#title'] = t('Year');
-  $form['themed_element']['days']['#description'] = t('Please enter a year to limit records by the year they were published, created or modified in the database.');
+  $form['themed_element']['days']['#title'] = t('Earliest year of publication');
+  $form['themed_element']['days']['#description'] = t('Filter returned publications for those that have been published no earlier than this year.');
 
   // The Journal Name filter doesn't seem to work, so remove it
   for ($i = 1; $i <= $num_criteria; $i++) {
@@ -55,11 +55,15 @@ function tripal_pub_remote_alter_form_AGL($form, $form_state, $num_criteria = 1)
  */
 function tripal_pub_remote_validate_form_AGL($form, $form_state) {
   $days = trim($form_state['values']["days"]);
+  $latestyear = trim($form_state['values']["latestyear"]);
   $num_criteria = $form_state['values']['num_criteria'];
 
   if ($days and !preg_match('/^\d\d\d\d$/', $days)) {
     form_set_error("days", "Please enter a four digit year.");
   }
+  if ($latestyear and !preg_match('/^\d\d\d\d$/', $latestyear)) {
+    form_set_error("latestyear", "Please enter a four digit year.");
+  }
 
   $num_ids = 0;
   for ($i = 1; $i <= $num_criteria; $i++) {
@@ -96,9 +100,10 @@ function tripal_pub_remote_validate_form_AGL($form, $form_state) {
  * @ingroup tripal_pub
  */
 function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $page) {
-  // get some values from the serach array
+  // get some values from the search array
   $num_criteria = $search_array['num_criteria'];
   $days = array_key_exists('days', $search_array) ? $search_array['days'] : '';
+  $latestyear = array_key_exists('latestyear', $search_array) ? $search_array['latestyear'] : '';
 
   // set some defaults
   $search_array['limit'] = $num_to_retrieve;
@@ -342,14 +347,15 @@ function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $page) {
     }
   }
 
-  // for AGL the 'days' form element was converted to represent the year
-  if ($days) {
-    $ccl .= "and year=($days)";
-  }
+  // for AGL the 'days' form element was converted to represent the earliest year
+  // ! these search terms do not work for AGL, disable here and filter the results returned
+  // if ($days) { $ccl .= "and year>=($days) "; }
+  // if ($latestyear) { $ccl .= "and year<=($latestyear) "; }
 
   // remove any preceeding 'and' or 'or'
   $ccl = preg_replace('/^\s*(and|or)/', '', $ccl);
 
+
   // yaz_connect() prepares for a connection to a Z39.50 server. This function is non-blocking
   // and does not attempt to establish a connection - it merely prepares a connect to be
   // performed later when yaz_wait() is called.
@@ -364,12 +370,17 @@ function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $page) {
   // the search query is built using CCL, we need to first
   // configure it so it can map the attributes to defined identifiers
   // The attribute set used by AGL can be found at the bottom of this page:
-  // http://agricola.nal.usda.gov/help/z3950.html
+  // https://agricola.nal.usda.gov/help/z3950.html
+  //   Boolean searching (AND, OR, NOT) is supported on search types with
+  //   a position attribute of 3 only.
   //
   // More in depth details:  http://www.loc.gov/z3950/agency/bib1.html
   //
-  // CCL Syntax: http://www.indexdata.com/yaz/doc/tools.html#CCL
+  // CCL Syntax: https://www.indexdata.com/yaz/doc/tools.html#CCL
   //
+  // the abstract field u=62, year u=30, and journal u=1033 are not in the documented
+  // list of supported values at https://agricola.nal.usda.gov/help/z3950.html
+  // publisherdate u=31 is listed, but if used returns zero results
   $fields = [
     "title" => "u=4",
     "author" => "u=1003",
@@ -393,10 +404,9 @@ function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $page) {
 
   // get the total number of records
   $total_records = tripal_pub_AGL_count($yazc, $search_str);
-
-  // get the pubs in the specified rang
+  // get the pubs in the specified range
   $start = $page * $num_to_retrieve;
-  $results = tripal_pub_AGL_range($yazc, $search_str, $start, $num_to_retrieve, $total_records);
+  $results = tripal_pub_AGL_range($yazc, $search_str, $start, $num_to_retrieve, $total_records, $days, $latestyear);
 
   // close the connection
   yaz_close($yazc);
@@ -425,8 +435,18 @@ function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $page) {
  *
  * @ingroup tripal_pub
  */
-function tripal_pub_AGL_range($yazc, $search_str, $start, $num_to_retrieve, $total_records) {
-  yaz_range($yazc, 1, $total_records);
+function tripal_pub_AGL_range($yazc, $search_str, $start, $num_to_retrieve, $total_records, $startyear, $endyear) {
+  // The original code was: yaz_range($yazc, 1, $total_records);
+  // and for large queries we received an error: ERROR retrieving records from AGL: (10007) Timeout
+  // or for an intermediate size query: ERROR retrieving records from AGL: (10004) Connection lost
+  // Empirical testing shows errors started somewhere between 1550 and 1575 records
+  // If we use just the appropriate values for this range to the call to yaz_range then it works,
+  // but we can't exceed $total_records
+  $local_to_retrieve = $num_to_retrieve;
+  if (($start + $num_to_retrieve) > $total_records) {
+    $local_to_retrieve = $total_records - $start;
+  }
+  yaz_range($yazc, $start, $local_to_retrieve); // $start is 0-based
   if (!yaz_present($yazc)) {
     $error_no = yaz_errno($yazc);
     $error_msg = yaz_error($yazc);
@@ -463,8 +483,21 @@ function tripal_pub_AGL_range($yazc, $search_str, $start, $num_to_retrieve, $tot
         'pubs' => [],
       ];
     }
+
     // parse the pub XML
     $pub = tripal_pub_AGL_parse_pubxml($pub_xml);
+
+    // since year limits don't work when searching, implement them here for the
+    // returned publications. This is a very ugly solution, because the count of
+    // publications is now wrong, the xml for every pub must still be downloaded,
+    // and the Test Importer will display pubs that will be later filtered out.
+    $pass = 1;
+    if (array_key_exists('Year', $pub)) {
+      if ( ($startyear) and ($pub['Year']<$startyear) ) { $pass = 0; }
+      if ( ($endyear) and ($pub['Year']>$endyear) ) { $pass = 0; }
+    }
+    $pub['passfilter'] = $pass;
+
     $pubs[] = $pub;
   }
   return [
@@ -583,17 +616,24 @@ function tripal_pub_AGL_parse_pubxml($pub_xml) {
             '11' => 'Nov',
             '12' => 'Dec',
           ];
-          $date0 = substr($value, 0, 6);  // date entered on file
-          $date1 = substr($value, 7, 4);  // year of publication
-          $date2 = substr($value, 11, 4); // month of publication
-          $place = substr($value, 15, 3);
-          $lang = substr($value, 35, 3);
+          $date0 = mb_substr($value, 0, 6);  // date entered on file
+          $typeofdate = mb_substr($value, 6, 1);  // e = detailed date
+          $date1 = mb_substr($value, 7, 4);  // year of publication
+          $date2 = mb_substr($value, 11, 2); // month of publication
+          $date3 = mb_substr($value, 13, 2); // day of publication
+          $place = mb_substr($value, 15, 3);
+          $lang = mb_substr($value, 35, 3);
           if (preg_match('/\d\d\d\d/', $date1)) {
             $pub['Year'] = $date1;
             $pub['Publication Date'] = $date1;
           }
-          if (preg_match('/\d\d/', $date2)) {
-            $pub['Publication Date'] = $date1 . " " . $month[substr($date2, 0, 2)] . " " . substr($date2, 3, 2);
+          if (($typeofdate == 'e') and (preg_match('/\d\d/', $date2))) {
+            if ( ( $date2 >= 1 ) and ( $date2 <= 12 ) ) {
+              $pub['Publication Date'] = $date1 . " " . $month[$date2] . " " . $date3;
+            }
+            else {
+              drupal_set_message("Invalid month value \"$date2\" extracted from \"$value\"", "warning");
+            }
           }
           if (!preg_match('/\s+/', $place)) {
             $pub['Published Location'] = $place;
@@ -892,7 +932,12 @@ function tripal_pub_AGL_parse_pubxml($pub_xml) {
   }
 
   // build the full authors list
-  if (is_array($pub['Author List'])) {
+  if (!array_key_exists('Author List', $pub)) {
+    // there is a constraint in chado.pubprop against value being null 
+    $pub['Author List'] = array(['Surname' => 'anonymous']);
+    $pub['Authors'] = 'anonymous';
+  }
+  else if (is_array($pub['Author List'])) {
     $authors = '';
     foreach ($pub['Author List'] as $author) {
       if (array_key_exists('valid', $author) and $author['valid'] == 'N') {
@@ -913,7 +958,7 @@ function tripal_pub_AGL_parse_pubxml($pub_xml) {
         }
       }
     }
-    $authors = substr($authors, 0, -2);
+    $authors = mb_substr($authors, 0, -2);
     $pub['Authors'] = $authors;
   }
   else {
@@ -926,13 +971,18 @@ function tripal_pub_AGL_parse_pubxml($pub_xml) {
     $pub['Abstract'] = preg_replace('/[\p{So}]/u', '', mb_convert_encoding($pub['Abstract'], 'UTF-8', 'HTML-ENTITIES'));
   }
   $newauths = [];
-  foreach ($pub['Author List'] AS $auth) {
-    foreach ($auth AS $k => $v) {
-      $auth[$k] = preg_replace('/[\p{So}]/u', '', mb_convert_encoding($v, 'UTF-8', 'HTML-ENTITIES'));
+  if (array_key_exists('Author List', $pub)) {
+    foreach ($pub['Author List'] AS $auth) {
+      foreach ($auth AS $k => $v) {
+        $auth[$k] = preg_replace('/[\p{So}]/u', '', mb_convert_encoding($v, 'UTF-8', 'HTML-ENTITIES'));
+      }
+      array_push($newauths, $auth);
     }
-    array_push($newauths, $auth);
+    $pub['Author List'] = $newauths;
+  }
+  else {
+    $pub['Author List'] = array(['Surname' => 'anonymous']);
   }
-  $pub['Author List'] = $newauths;
 
   // build the citation
   $pub['Citation'] = chado_pub_create_citation($pub);
@@ -1009,11 +1059,11 @@ function tripal_pub_remote_search_AGL_get_author($xml, $ind1) {
           $first_names = explode(' ', $author['Given Name']);
           $author['First Initials'] = '';
           foreach ($first_names as $index => $name) {
-            $author['First Initials'] .= substr($name, 0, 1);
+            $author['First Initials'] .= mb_substr($name, 0, 1);
           }
         }
-        if ($ind1 == 3) { // A family name
-
+        if ($ind1 == 3) { // A family name, occurs rarely
+          $author['Surname'] = $value;
         }
         break;
     }

+ 0 - 1
tripal_chado/includes/loaders/tripal_chado.pub_importer_PMID.inc

@@ -30,7 +30,6 @@ function tripal_pub_remote_alter_form_PMID($form, $form_state, $num_criteria = 1
   for ($i = 1; $i <= $num_criteria; $i++) {
     $form['themed_element']['criteria'][$i]["scope-$i"]['#options']['abstract'] = 'Abstract/Title';
   }
-
   return $form;
 }
 

+ 107 - 70
tripal_chado/includes/loaders/tripal_chado.pub_importers.inc

@@ -133,13 +133,9 @@ function tripal_pub_importer_setup_page($action = 'new', $pub_import_id = NULL)
     drupal_set_message(t('If you want to create contact pages for authors, you must first ') . l(t('load the Tripal Contact Ontology'), 'admin/tripal/loaders/chado_vocabs/obo_loader'), 'error');
   }
 
-  /**
-   * Commenting the AGL message out until we get AGL working again.
-   * spf 6/22/2018
-   * if(!extension_loaded ('yaz')){
-   * drupal_set_message(t('<b>Note:</b> In order to create an importer using the USDA National Agricultural Library (AGL) you must install the yaz libraries. See the ') . l(t('Pub Module help page'), 'admin/tripal/legacy/tripal_pub/help') . ' for assistance.  If you do not want to use AGL you can ignore this warning.', 'warning');
-   * }
-   */
+  if(!extension_loaded ('yaz')){
+    drupal_set_message(t('<b>Note:</b> In order to create an importer using the USDA National Agricultural Library (AGL) you must install the yaz libraries. See the ') . l(t('Pub Module help page'), 'admin/tripal/legacy/tripal_pub/help') . ' for assistance.  If you do not want to use AGL you can ignore this warning.', 'warning');
+  }
 
   // generate the search form
   $form = drupal_get_form('tripal_pub_importer_setup_form', $pub_import_id, $action);
@@ -152,11 +148,13 @@ function tripal_pub_importer_setup_page($action = 'new', $pub_import_id = NULL)
     $remote_db = array_key_exists('remote_db', $_SESSION['tripal_pub_import']) ? $_SESSION['tripal_pub_import']['remote_db'] : '';
     $num_criteria = array_key_exists('num_criteria', $_SESSION['tripal_pub_import']) ? $_SESSION['tripal_pub_import']['num_criteria'] : '';
     $days = array_key_exists('days', $_SESSION['tripal_pub_import']) ? $_SESSION['tripal_pub_import']['days'] : '';
+    $latestyear = array_key_exists('latestyear', $_SESSION['tripal_pub_import']) ? $_SESSION['tripal_pub_import']['latestyear'] : '';
 
     $search_array = [];
     $search_array['remote_db'] = $remote_db;
     $search_array['num_criteria'] = $num_criteria;
     $search_array['days'] = $days;
+    $search_array['latestyear'] = $latestyear;
     for ($i = 1; $i <= $num_criteria; $i++) {
       $search_array['criteria'][$i]['search_terms'] = $_SESSION['tripal_pub_import']['criteria'][$i]['search_terms'];
       $search_array['criteria'][$i]['scope'] = $_SESSION['tripal_pub_import']['criteria'][$i]['scope'];
@@ -186,6 +184,10 @@ function tripal_pub_importer_setup_page($action = 'new', $pub_import_id = NULL)
           if (array_key_exists('Publication Dbxref', $pub) and $pub['Publication Dbxref']) {
             $raw_link = l('raw', 'admin/tripal/loaders/pub/raw/' . $pub['Publication Dbxref'], ['attributes' => ['target' => '_blank']]);
           }
+          // indicate those that will be excluded by AGL year filtering parameters
+          if ((array_key_exists('passfilter', $pub)) and ($pub['passfilter'] == 0 )) {
+            $citation = '<span style="text-decoration: line-through;">' . $citation . '</span>';
+          }
           $rows[] = [
             number_format($i),
             $citation,
@@ -279,12 +281,13 @@ function tripal_pub_importer_setup_form($form, &$form_state = NULL, $pub_import_
   $criteria = NULL;
   $remote_db = '';
   $days = '';
+  $latestyear = '';
   $disabled = '';
   $do_contact = '';
   $num_criteria = 1;
   $loader_name = '';
 
-  // if this is an edit the we are pulling an import object from the database
+  // if this is an edit then we are pulling an import object from the database
   if ($action == "edit") {
     $sql = "SELECT * FROM {tripal_pub_import} WHERE pub_import_id = :pub_import_id";
     $importer = db_query($sql, [':pub_import_id' => $pub_import_id])->fetchObject();
@@ -292,6 +295,7 @@ function tripal_pub_importer_setup_form($form, &$form_state = NULL, $pub_import_
     $criteria = unserialize($importer->criteria);
     $remote_db = $criteria['remote_db'];
     $days = $criteria['days'];
+    $latestyear = $criteria['latestyear'];
     $disabled = $criteria['disabled'];
     $do_contact = $criteria['do_contact'];
     $num_criteria = $criteria['num_criteria'];
@@ -302,6 +306,7 @@ function tripal_pub_importer_setup_form($form, &$form_state = NULL, $pub_import_
   if (array_key_exists('tripal_pub_import', $_SESSION)) {
     $remote_db = $_SESSION['tripal_pub_import']['remote_db'];
     $days = $_SESSION['tripal_pub_import']['days'];
+    $latestyear = $_SESSION['tripal_pub_import']['latestyear'];
     $disabled = $_SESSION['tripal_pub_import']['disabled'];
     $do_contact = $_SESSION['tripal_pub_import']['do_contact'];
     $num_criteria = $_SESSION['tripal_pub_import']['num_criteria'];
@@ -317,21 +322,23 @@ function tripal_pub_importer_setup_form($form, &$form_state = NULL, $pub_import_
   // if we are re constructing the form from a failed validation or ajax callback
   // then use the $form_state['values'] values
   if (array_key_exists('values', $form_state)) {
-    $remote_db = $form_state['values']['remote_db'];
-    $days = $form_state['values']['days'];
-    $disabled = $form_state['values']['disabled'];
-    $do_contact = $form_state['values']['do_contact'];
-    $num_criteria = $form_state['values']['num_criteria'];
-    $loader_name = $form_state['values']['loader_name'];
+    $remote_db = $form_state['values']['remote_db'] ?? null;
+    $days = $form_state['values']['days'] ?? null;
+    $latestyear = $form_state['values']['latestyear'] ?? null;
+    $disabled = $form_state['values']['disabled'] ?? null;
+    $do_contact = $form_state['values']['do_contact'] ?? null;
+    $num_criteria = $form_state['values']['num_criteria'] ?? null;
+    $loader_name = $form_state['values']['loader_name'] ?? null;
   }
   // if we are re building the form from after submission (from ajax call) then
   // the values are in the $form_state['input'] array
   if (array_key_exists('input', $form_state) and !empty($form_state['input'])) {
-    $remote_db = $form_state['input']['remote_db'];
-    $days = $form_state['input']['days'];
-    $disabled = $form_state['input']['disabled'];
-    $do_contact = $form_state['input']['do_contact'];
-    $loader_name = $form_state['input']['loader_name'];
+    $remote_db = $form_state['input']['remote_db'] ?? null;
+    $days = $form_state['input']['days'] ?? null;
+    $latestyear = $form_state['input']['latestyear'] ?? null;
+    $disabled = $form_state['input']['disabled'] ?? null;
+    $do_contact = $form_state['input']['do_contact'] ?? null;
+    $loader_name = $form_state['input']['loader_name'] ?? null;
 
     // because the num_criteria is a value and not a visible or hidden form
     // element it is not part of the ['input'] array, so we need to get it from the form
@@ -383,11 +390,7 @@ function tripal_pub_importer_setup_form($form, &$form_state = NULL, $pub_import_
   if (!$remote_db) {
     $remote_db = 'PMID';
   }
-  /**
-   * Removing AGL option until we get it fixed
-   * spf 6/22/2018
-   */
-  unset($remote_dbs['AGL']);
+
   $form['themed_element']['remote_db'] = [
     '#title' => t('Source'),
     '#type' => 'select',
@@ -407,6 +410,14 @@ function tripal_pub_importer_setup_form($form, &$form_state = NULL, $pub_import_
     '#default_value' => $days,
     '#size' => 5,
   ];
+  // AGL only, this field will be removed for the pubmed loader
+  $form['themed_element']['latestyear'] = [
+    '#type' => 'textfield',
+    '#title' => t('Latest year of publication'),
+    '#description' => t('Filter returned publications for those that have been published no later than this year.'),
+    '#default_value' => $latestyear,
+    '#size' => 5,
+  ];
   $form['themed_element']['disabled'] = [
     '#type' => 'checkbox',
     '#title' => t('Disabled'),
@@ -567,10 +578,14 @@ function tripal_pub_importer_setup_add_criteria_fields(&$form, &$form_state, $nu
 
     // if we have criteria supplied from the database then use that as the initial defaults
     if ($criteria) {
-      $search_terms = $criteria['criteria'][$i]['search_terms'];
-      $scope = $criteria['criteria'][$i]['scope'];
-      $is_phrase = $criteria['criteria'][$i]['is_phrase'];
-      $operation = $criteria['criteria'][$i]['operation'];
+      if (array_key_exists('criteria', $criteria)) {
+        if (array_key_exists($i, $criteria['criteria'])) {
+          $search_terms = $criteria['criteria'][$i]['search_terms'];
+          $scope = $criteria['criteria'][$i]['scope'];
+          $is_phrase = $criteria['criteria'][$i]['is_phrase'];
+          $operation = $criteria['criteria'][$i]['operation'];
+        }
+      }
     }
 
     // if the criteria comes the session
@@ -582,12 +597,12 @@ function tripal_pub_importer_setup_add_criteria_fields(&$form, &$form_state, $nu
     }
 
     // If the form_state has variables then use those.  This happens when an error occurs on the form or the
-    // form is resbumitted using AJAX
+    // form is resubmitted using AJAX
     if (array_key_exists('values', $form_state)) {
-      $search_terms = $form_state['values']["search_terms-$i"];
-      $scope = $form_state['values']["scope-$i"];
-      $is_phrase = $form_state['values']["is_phrase-$i"];
-      $operation = $form_state['values']["operation-$i"];
+      $search_terms = $form_state['values']["search_terms-$i"] ?? null;
+      $scope = $form_state['values']["scope-$i"] ?? null;
+      $is_phrase = $form_state['values']["is_phrase-$i"] ?? null;
+      $operation = $form_state['values']["operation-$i"] ?? null;
     }
     $form['themed_element']['criteria'][$i]["scope-$i"] = [
       '#type' => 'select',
@@ -707,6 +722,7 @@ function tripal_pub_importer_setup_form_validate($form, &$form_state) {
   $num_criteria = $form_state['values']['num_criteria'];
   $remote_db = $form_state['values']["remote_db"];
   $days = trim($form_state['values']["days"]);
+  $latestyear = trim($form_state['values']["latestyear"]);
   $disabled = $form_state['values']["disabled"];
   $do_contact = $form_state['values']["do_contact"];
   $loader_name = trim($form_state['values']["loader_name"]);
@@ -732,6 +748,10 @@ function tripal_pub_importer_setup_form_validate($form, &$form_state) {
     form_set_error("days", "Please enter a numeric, non decimal value, for the number of days.");
     $_SESSION['tripal_pub_import']['perform_search'] = 0;
   }
+  if ($latestyear and !is_numeric($latestyear) or preg_match('/\./', $latestyear)) {
+    form_set_error("latestyear", "Please enter a numeric, non decimal value, for latestyear.");
+    $_SESSION['tripal_pub_import']['perform_search'] = 0;
+  }
   // allow the selected remote database to validate any changes to the form if needed
   $callback = "tripal_pub_remote_validate_form_$remote_db";
   $form = call_user_func($callback, $form, $form_state);
@@ -748,6 +768,7 @@ function tripal_pub_importer_setup_form_submit($form, &$form_state) {
   $num_criteria = $form_state['values']['num_criteria'];
   $remote_db = $form_state['values']["remote_db"];
   $days = trim($form_state['values']["days"]);
+  $latestyear = trim($form_state['values']["latestyear"]);
   $loader_name = trim($form_state['values']["loader_name"]);
   $disabled = $form_state['values']["disabled"];
   $do_contact = $form_state['values']["do_contact"];
@@ -755,6 +776,7 @@ function tripal_pub_importer_setup_form_submit($form, &$form_state) {
   // set the session variables
   $_SESSION['tripal_pub_import']['remote_db'] = $remote_db;
   $_SESSION['tripal_pub_import']['days'] = $days;
+  $_SESSION['tripal_pub_import']['latestyear'] = $latestyear;
   $_SESSION['tripal_pub_import']['num_criteria'] = $num_criteria;
   $_SESSION['tripal_pub_import']['loader_name'] = $loader_name;
   $_SESSION['tripal_pub_import']['disabled'] = $disabled;
@@ -861,6 +883,10 @@ function theme_tripal_pub_importer_setup_form_elements($variables) {
   $markup .= '</div>';
   $markup .= '<div id="pub-search-form-row1" style="clear:both">';
   $markup .= '  <div id="pub-search-form-row1-col1">' . drupal_render($form['days']) . '</div>';
+  // latest year field is used ony for AGL importer
+  if ($variables['form']['remote_db']['#value'] == 'AGL') {
+    $markup .= '  <div id="pub-search-form-row1-col2">' . drupal_render($form['latestyear']) . '</div>';
+  }
   $markup .= '</div>';
   $markup .= '<div id="pub-search-form-row2">' . drupal_render($form['disabled']) . '</div>';
   $markup .= '<div id="pub-search-form-row3">' . drupal_render($form['do_contact']) . '</div>';
@@ -996,46 +1022,53 @@ function tripal_pub_add_publications($pubs, $do_contact, $update = FALSE, $job =
     $memory = number_format(memory_get_usage()) . " bytes";
     print "Processing $i of $total_pubs. Memory usage: $memory.\r";
 
-    // add the publication to Chado
-    $action = '';
-    $pub_id = tripal_pub_add_publication($pub, $action, $do_contact, $update, $job);
-    if ($pub_id) {
-      // add the publication cross reference (e.g. to PubMed)
-      if ($pub_id and $pub['Publication Dbxref']) {
-        $dbxref = [];
-        if (preg_match('/^(.*?):(.*?)$/', trim($pub['Publication Dbxref']), $matches)) {
-          $dbxref['db_name'] = $matches[1];
-          $dbxref['accession'] = $matches[2];
-        }
-        else {
-          tripal_report_error($message_type, TRIPAL_ERROR,
-            'Unable to extract the dbxref to be associated with the publication (pub ID=@pub_id) from @dbxref. This reference should be [database-name]:[accession]',
-            [
-              '@pub_id' => $pub_id,
-              '@dbxref' => $pub['Publication Dbxref'],
-              $message_opts,
-            ]
-          );
+    // implementation of year limits for AGL uses a 'passfilter' flag
+    if ((!array_key_exists('passfilter', $pub)) or ($pub['passfilter'] == 1 )) {
+
+      // add the publication to Chado
+      $action = '';
+      $pub_id = tripal_pub_add_publication($pub, $action, $do_contact, $update, $job);
+      // $pub_id will be null if publication already existed
+      if ($pub_id) {
+        // add the publication cross reference (e.g. to PubMed)
+        if ($pub_id and $pub['Publication Dbxref']) {
+          $dbxref = [];
+          if (preg_match('/^(.*?):(.*?)$/', trim($pub['Publication Dbxref']), $matches)) {
+            $dbxref['db_name'] = $matches[1];
+            $dbxref['accession'] = $matches[2];
+          }
+          else {
+            tripal_report_error($message_type, TRIPAL_ERROR,
+              'Unable to extract the dbxref to be associated with the publication (pub ID=@pub_id) from @dbxref. This reference should be [database-name]:[accession]',
+              [
+                '@pub_id' => $pub_id,
+                '@dbxref' => $pub['Publication Dbxref'],
+                $message_opts,
+              ]
+            );
+          }
+          $pub_dbxref = tripal_associate_dbxref('pub', $pub_id, $dbxref);
         }
-        $pub_dbxref = tripal_associate_dbxref('pub', $pub_id, $dbxref);
+        $pub['pub_id'] = $pub_id;
       }
-      $pub['pub_id'] = $pub_id;
-    }
 
-    switch ($action) {
-      case 'error':
-        $report['error'][] = $pub['Citation'];
-        break;
-      case 'inserted':
-        $report['inserted'][] = $pub['Citation'];
-        break;
-      case 'updated':
-        $report['updated'][] = $pub['Citation'];
-        break;
-      case 'skipped':
-        $report['skipped'][] = $pub['Citation'];
-        break;
+      switch ($action) {
+        case 'error':
+          $report['error'][] = $pub['Citation'];
+          break;
+        case 'inserted':
+          $report['inserted'][] = $pub['Citation'];
+          break;
+        case 'updated':
+          $report['updated'][] = $pub['Citation'];
+          break;
+        case 'skipped':
+          $report['skipped'][] = $pub['Citation'];
+          break;
+      }
     }
+    // else pub failed AGL year filter
+    else { $report['skipped'][] = $pub['Citation']; }
     $i++;
   }
   return $report;
@@ -1081,7 +1114,6 @@ function tripal_pub_add_publications($pubs, $do_contact, $update = FALSE, $job =
  */
 function tripal_pub_add_publication($pub_details, &$action, $do_contact = FALSE, $update_if_exists = FALSE, $job = NULL) {
   $pub_id = 0;
-
   // These are options for the tripal_report_error function. We do not
   // want to log messages to the watchdog except for errors and to the job and 
   // to the terminal
@@ -1250,6 +1282,11 @@ function tripal_pub_add_publication($pub_details, &$action, $do_contact = FALSE,
       continue;
     }
 
+    // Filtering flag for AGL, not a property to add here
+    if ($key == 'passfilter') {
+      continue;
+    }
+
     // Since we're not updating the 'Publication Dbxref' on an update
     // skip this property.
     if ($update_if_exists and $key == 'Publication Dbxref') {