فهرست منبع

Fixed several bugs in pub module

spficklin 12 سال پیش
والد
کامیت
bde2b2c996

+ 32 - 44
tripal_pub/api/tripal_pub.api.inc

@@ -61,7 +61,6 @@ function tripal_pub_get_remote_search_results($remote_db, $search_array,
   if (is_int($page) and $page > 0) {
     $_GET['page'] = $page;
   }  
-  
   // now call the callback function to get the results
   $callback = "tripal_pub_remote_search_$remote_db";
   $pubs = array();
@@ -105,7 +104,7 @@ function tripal_pub_get_raw_data($dbxref) {
 /*
  * @ingroup tripal_pub_api
  */
-function tripal_pub_update_publications($do_contact = FALSE, $dbxref = NULL) {
+function tripal_pub_update_publications($do_contact = FALSE, $dbxref = NULL, $db = NULL) {
   
   // get a persistent connection
   $connection = tripal_db_persistent_chado();
@@ -121,8 +120,7 @@ function tripal_pub_update_publications($do_contact = FALSE, $dbxref = NULL) {
            "insertions/updates is rolled back and will not be found in the database\n\n";
   }
   
-  // get a list of all publications that have 
-  // supported databases
+  // get a list of all publications by their Dbxrefs that have supported databases
   $sql = "
     SELECT DB.name as db_name, DBX.accession
     FROM pub P
@@ -138,8 +136,12 @@ function tripal_pub_update_publications($do_contact = FALSE, $dbxref = NULL) {
     $args[] = $accession;
     $args[] = $dbname;    
   }
+  elseif ($db) {
+    $sql .= " WHERE DB.name = '%s' ";
+    $args[] = $db;
+  }
   $sql .= "ORDER BY DB.name, P.pub_id";
-  $results = chado_query($sql, $args);
+  $results = chado_query($sql, $args);  
   
   $num_to_retrieve = 100;
   $i = 0;                 // count the number of IDs. When we hit $num_to_retrieve we'll do the query
@@ -151,51 +153,28 @@ function tripal_pub_update_publications($do_contact = FALSE, $dbxref = NULL) {
   while ($pub = db_fetch_object($results)) {    
     $accession = $pub->accession;
     $remote_db = $pub->db_name;
-    
+        
     // here we need to only update publications for databases we support
     $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
     if(!in_array($remote_db, $supported_dbs)) {
       continue;
-    }
-    
-    // if we're switching databases then reset the search array    
-    if($remote_db != $curr_db) {
-      // if we had a previous DB then do the update.
-      if ($curr_db) {
-        $search['num_criteria'] = $i - 1;
-        $pubs = tripal_pub_get_remote_search_results($remote_db, $search, $i, 0);
-        tripal_pub_add_publications($pubs, $do_contact, TRUE); 
-      }
-      $curr_db = $remote_db;      
-      $search = array(
-        'remote_db' => $remote_db,
-        'criteria' => array(),
-      );
-      $ids = array();
-      $i = 0;
-    }
-    
-    // if we've hit the maximum number to retrieve then do the search
-    if($i == $num_to_retrieve) {      
-      $search['num_criteria'] = $i - 1;
-      $pubs = tripal_pub_get_remote_search_results($remote_db, $search, $i, 0);      
-      tripal_pub_add_publications($pubs, $do_contact, TRUE);
-      $search['criteria'] = array();
-      $i = 0;
-    }
-    
-    // add each accession to the search criteria
-    $search['criteria'][] = array(
-      'search_terms' => $accession,
-      'scope' => 'id',
-      'operation' => 'OR'
+    }    
+    $search = array(
+      'num_criteria' => 1,
+      'remote_db' => $remote_db,
+      'criteria' => array(
+        '1' => array(
+          'search_terms' => "$remote_db:$accession",
+          'scope' => 'id',
+          'operation' => '',
+          'is_phrase' => 0,
+        ),
+      ),
     );
+    $pubs = tripal_pub_get_remote_search_results($remote_db, $search, 1, 0);
+    tripal_pub_add_publications($pubs, $do_contact, TRUE);  
     $i++;   
   }
-  // now update any remaining in the search criteria array
-  $search['num_criteria'] = $i - 1;
-  $pubs = tripal_pub_get_remote_search_results($remote_db, $search, $i, 0);
-  tripal_pub_add_publications($pubs, $do_contact, TRUE);
   
   // transaction is complete
   tripal_db_commit_transaction();
@@ -526,7 +505,16 @@ function tripal_pub_add_publication($pub_details, $do_contact = FALSE, $update =
   }
   
   // get the publication type (use the first publication type, any others will get stored as properties)
-  $pub_type = tripal_cv_get_cvterm_by_name($pub_details['Publication Type'][0], NULL, 'tripal_pub');
+  if(is_array($pub_details['Publication Type'])) {
+    $pub_type = tripal_cv_get_cvterm_by_name($pub_details['Publication Type'][0], NULL, 'tripal_pub');
+  } 
+  elseif($pub_details['Publication Type']) {
+    $pub_type = tripal_cv_get_cvterm_by_name($pub_details['Publication Type'], NULL, 'tripal_pub');
+  }
+  else {
+    watchdog('tripal_pub', "The Publication Type is a required property but is missing", array(), WATCHDOG_ERROR);
+    return FALSE;
+  }
   if (!$pub_type) {
     watchdog('tripal_pub', "Cannot find publication type: '%type'", 
       array('%type' => $pub_details['Publication Type'][0]), WATCHDOG_ERROR);

+ 27 - 8
tripal_pub/includes/importers/AGL.inc

@@ -15,10 +15,26 @@ function tripal_pub_remote_alter_form_AGL(&$form, $form_state) {
  */
 function tripal_pub_remote_validate_form_AGL(&$form, $form_state) {
   $days =  trim($form_state['values']["days"]);
+  $num_criteria = $form['num_criteria']['#default_value'];
   
   if ($days and !preg_match('/^\d\d\d\d$/', $days)) {
     form_set_error("days", "Please enter a four digit year.");
   }
+  
+  $num_ids = 0;
+  for ($i = 1; $i <= $num_criteria; $i++) {            
+    $search_terms =  trim($form_state['values']["search_terms-$i"]);
+    $scope =  $form_state['values']["scope-$i"];
+    if ($scope == 'id' and !preg_match('/^AGL:\d+$/', $search_terms)) {
+      form_set_error("search_terms-$i", "The AGL accession be a numeric value, prefixed with 'AGL:' (e.g. AGL:3890740).");  
+    }       
+    if ($scope == 'id') {
+      $num_ids++;
+    }
+    if($num_ids > 1) {
+      form_set_error("search_terms-$i", "Unfortuantely, the AGL importer can only support a single accession at a time. Please remove the others.");
+    }
+  }
 }
 /**
  *
@@ -147,20 +163,20 @@ function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $pager_id
     }
     elseif ($scope == 'id') {
       if ($first_id) {
-        $id .= "(" . preg_replace('/AGL:([^\s]*)/', 'id=($1)', $search_terms) . ") ";
+        $id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
         $first_id = 0; 
       }
       else {
-        $id .= "$op (" . preg_replace('/AGL:([^\s]*)/', 'id=($1)', $search_terms) . ") ";
+        $id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
       }
     } 
     elseif ($scope == 'negate_id') {
       if ($first_negate_id) {
-        $negate_id .= "(" . preg_replace('/AGL:([^\s]*)/', 'id=($1)', $search_terms) . ") ";
+        $negate_id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
         $first_negate_id = 0; 
       }
       else {
-        $negate_id .= "$op (" . preg_replace('/AGL:([^\s]*)/', 'id=($1)', $search_terms) . ") ";
+        $negate_id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
       }
     } 
     elseif ($scope == 'any'){
@@ -193,7 +209,7 @@ function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $pager_id
   $negate_title_done = 0;
   $negate_id_done = 0;
   $negate_any_done = 0;
-  for ($i = 1; $i < count($order) ; $i++) {
+  for ($i = 0; $i < count($order) ; $i++) {
     if ($order[$i]['scope'] == 'abstract' and !$abstract_done) {
       $op = $order[$i]['op'];
       $ccl .= "$op abstract=($abstract) ";
@@ -212,7 +228,7 @@ function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $pager_id
       $ccl .= "not author=($negate_author) ";
       $negate_author_done = 1;
     }
-    if ($order[$i]['scope'] == 'id' and !$id_done) {
+    if ($order[$i]['scope'] == 'id' and !$id_done) {      
       $op = $order[$i]['op'];
       $ccl .= "$op id=($id) ";
       $id_done = 1;
@@ -245,7 +261,7 @@ function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $pager_id
   if ($days) {
     $ccl .= "and year=($days)";
   }
-  
+
   // remove any preceeding 'and' or 'or'
   $ccl = preg_replace('/^\s*(and|or)/', '', $ccl);
     
@@ -295,7 +311,7 @@ function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $pager_id
   // close the connection
   unset($_SESSION['tripal_pub_AGL_query'][$search_str]['yaz_connection']);
   yaz_close($yazc);  
-    
+  
   return $pubs;
 }
 /*
@@ -382,6 +398,9 @@ function tripal_pub_AGL_range($search_array, $start = 0, $limit = 10) {
 function tripal_pub_AGL_parse_pubxml($pub_xml) {
   $pub = array();
   
+  // publications from the NAL Article Citation Database are all journal articles
+  $pub['Publication Type'][] = 'Journal Article';
+  
   if (!$pub_xml) {
     return $pub;
   }

+ 18 - 5
tripal_pub/includes/importers/PMID.inc

@@ -22,7 +22,15 @@ function tripal_pub_remote_alter_form_PMID(&$form, $form_state) {
  *  
  */
 function tripal_pub_remote_validate_form_PMID(&$form, $form_state) {
-  // no additional validation needed  
+  $num_criteria = $form['num_criteria']['#default_value'];
+    
+  for ($i = 1; $i <= $num_criteria; $i++) {            
+    $search_terms =  trim($form_state['values']["search_terms-$i"]);
+    $scope =  $form_state['values']["scope-$i"];
+    if ($scope == 'id' and !preg_match('/^PMID:\d+$/', $search_terms)) {
+      form_set_error("search_terms-$i", "The PubMed accession must be a numeric value, prefixed with 'PMID:' (e.g. PMID:23024789).");  
+    }       
+  }  
 }
 /**
  *
@@ -61,8 +69,8 @@ function tripal_pub_remote_search_PMID($search_array, $num_to_retrieve, $pager_i
       $search_str .= $search_terms . '[Title/Abstract]';
     }
     elseif ($scope == 'id') {
-      $search_terms = preg_replace('/PMID:([^\s]*)/', 'id=($1)', $search_terms);
-      $search_str .= '(' . $search_terms . ')[Uid]';      
+      $search_terms = preg_replace('/PMID:([^\s]*)/', '$1', $search_terms);
+      $search_str .=  $search_terms . '[Uid]';      
     }
     else {
       $search_str .= $search_terms;
@@ -80,6 +88,7 @@ function tripal_pub_remote_search_PMID($search_array, $num_to_retrieve, $pager_i
   $search_array['search_string'] = $search_str;  
   //dpm($search_array);  
   
+  unset($_SESSION['tripal_pub_PMID_query']);
   // we want to get the list of pubs using the search terms but using a Drupal style pager
   $pubs = tripal_pager_callback('tripal_pub_PMID_range',  $num_to_retrieve, $pager_id, 
     'tripal_pub_PMID_count', $search_array);
@@ -134,8 +143,8 @@ function tripal_pub_PMID_range($search_array, $start = 0, $limit = 10) {
   }
 
   // now get the list of PMIDs from the previous search  
-  $pmids_txt = tripal_pub_PMID_fetch($query_key, $web_env, 'uilist', 'text', $start, $limit);  
-  
+  $pmids_txt = tripal_pub_PMID_fetch($query_key, $web_env, 'uilist', 'text', $start, $limit);
+ 
   // iterate through each PMID and get the publication record. This requires a new search and new fetch
   $pmids = explode("\n", trim($pmids_txt));
   $pubs = array();
@@ -283,6 +292,10 @@ function tripal_pub_PMID_parse_pubxml($pub_xml) {
     if ($xml->nodeType == XMLReader::ELEMENT) {
       
       switch ($element) {
+        case 'ERROR':
+          $xml->read(); // get the value for this element
+          watchdog('tripal_pub', "Error: %err", array('%err' => $xml->value), WATCHDOG_ERROR);
+          break;
         case 'PMID':
           // thre are multiple places where a PMID is present in the XML and
           // since this code does not descend into every branch of the XML tree

+ 4 - 0
tripal_pub/includes/pub_importers.inc

@@ -242,6 +242,10 @@ function tripal_pub_importer_setup_form(&$form_state = NULL, $pub_import_id = NU
   foreach ($dbs as $index => $db) {
     $remote_dbs[$db->name] = $db->description;
   };
+  // use PubMed as the default
+  if (!$remote_db) {
+    $remote_db = 'PMID';
+  }
   $form['remote_db'] = array(
     '#title' => t('Remote Database'),
     '#type' => 'select',

+ 1 - 1
tripal_pub/tpub.obo

@@ -1356,7 +1356,7 @@ def: Internaltional Standard Book Number
 relationship: part_of TPUB:0000037 ! Publication Details
 
 [Term]
-id: TPUB:0000247
+id: TPUB:0000248
 name: Notes
 def: Information about the publication that is not classified using another term.
 relationship: part_of TPUB:0000037 ! Publication Details

+ 7 - 5
tripal_pub/tripal_pub.drush.inc

@@ -37,12 +37,14 @@ function tripal_pub_drush_command() {
     'description' => dt('Updates publication information for publications with a supported database cross-reference.'),
     'options' => array(
       'create_contacts' => dt('provide this option to create or update contacts for authors. By default contacts are not created or updated.'),
-      'dbxref' => dt('An accession number for a publication from a remote database (e.g. PMID:23582642)')
+      'dbxref' => dt('An accession number for a publication from a remote database (e.g. PMID:23582642)'),
+      'db' => dt('The database name (e.g. PMID or AGL)'),
     ),
     'examples' => array(
       'Standard example' => 'drush tripal-pubs-update',
       'Create contacts during update' => 'drush tripal-pubs-update --create_contacts=1',
-      'Update a single record' => 'drush tripal-pubs-update --dbxref=PMID:23582642'
+      'Update a single record' => 'drush tripal-pubs-update --dbxref=PMID:23582642',
+      'Update all records for a single database' => 'drush tripal-pubs-update --db=PMID'
     ),
     'aliases' => array('tpubs-update'),
   );
@@ -64,7 +66,7 @@ function drush_tripal_pub_tripal_pubs_import() {
 function drush_tripal_pub_tripal_pubs_update() {
   $create_contacts = drush_get_option('create_contacts');
   $dbxref = drush_get_option('dbxref');
-  
-  
-  tripal_pub_update_publications($create_contacts, $dbxref);
+  $db = drush_get_option('db');
+    
+  tripal_pub_update_publications($create_contacts, $dbxref, $db);
 }