Browse Source

Fixed bugs with importing

spficklin 12 years ago
parent
commit
60c4c07713

+ 47 - 12
tripal_pub/api/tripal_pub.api.inc

@@ -73,6 +73,37 @@ function tripal_pub_get_remote_search_results($remote_db, $search_array,
   
   return $pubs;  
 }
+/*
+ * @ingroup tripal_pub_api
+ */
+function tripal_pub_get_raw_data($dbxref) {
+  
+  if(preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
+    $remote_db = $matches[1];
+    $accession = $matches[2];
+    
+    // check that the database is supported
+    $supported_dbs = variable_get('tripal_pub_supported_dbs', array());    
+    if(!in_array($remote_db, $supported_dbs)) {
+      return "Unsupported database: $dbxref";
+    }
+    // build the search criteria
+    $search = array(
+      'remote_db' => $remote_db,
+      'criteria' => array(
+        array(
+          'search_terms' => $accession,
+          'scope' => 'id',
+        ),
+      ),
+    );  
+    $search['num_criteria'] = 0;
+    $pubs = tripal_pub_get_remote_search_results($remote_db, $search, 1, 0);
+    
+    return '<textarea cols=80 rows=20>' . $pubs[0]['xml'] . '</textarea>';
+  }  
+  return 'Invalid DB xref';    
+}
 /*
  * @ingroup tripal_pub_api
  */
@@ -180,7 +211,7 @@ function tripal_pub_update_publications($do_contact = FALSE) {
 /*
  * @ingroup tripal_pub_api
  */
-function tripal_pub_import_publications() {
+function tripal_pub_import_publications($pub_import_id = NULL) {
   $num_to_retrieve = 100;
   $pager_id = 0;
   $page = 0;
@@ -198,12 +229,16 @@ function tripal_pub_import_publications() {
      print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
            "If the load fails or is terminated prematurely then the entire set of \n" .
            "insertions/updates is rolled back and will not be found in the database\n\n";
-  }
-  
+  }  
   
   // get all of the loaders
-  $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0";
-  $results = db_query($sql);
+  $args = array();
+  $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0 ";
+  if ($pub_import_id) {
+    $sql .= " AND pub_import_id = %d";
+    $args[] = $pub_import_id;
+  }
+  $results = db_query($sql, $args);
   $do_contact = FALSE;
   while ($import = db_fetch_object($results)) {
     print "Importing: " . $import->name . "\n";
@@ -495,14 +530,14 @@ function tripal_pub_add_publication($pub_details, $do_contact = FALSE, $update =
     }
     // if the publication does not exist then create it.      
     $values = array(
-      'title' => $pub_details['Title'],
-      'volume' => $pub_details['Volume'],
+      'title'       => $pub_details['Title'],
+      'volume'      => $pub_details['Volume'],
       'series_name' => $pub_details['Journal Name'],
-      'issue' => $pub_details['Issue'],
-      'pyear' => $pub_details['Year'],
-      'pages' => $pub_details['Pages'],
-      'uniquename' => $pub_details['Citation'],
-      'type_id' => $pub_type->cvterm_id,
+      'issue'       => $pub_details['Issue'],
+      'pyear'       => $pub_details['Year'],
+      'pages'       => $pub_details['Pages'],
+      'uniquename'  => $pub_details['Citation'],
+      'type_id'     => $pub_type->cvterm_id,
     );
     $options = array('statement_name' => 'ins_pub_tivoseispypaunty');
     $pub = tripal_core_chado_insert('pub', $values, $options);     

+ 27 - 7
tripal_pub/includes/pub_importers.inc

@@ -76,7 +76,7 @@ function tripal_pub_importer_setup($action = 'new', $pub_import_id = NULL) {
   if ($_SESSION['tripal_pub_search']['perform_search']) {
     // get the list of publications from the remote database using the search criteria.  
     $pubs = tripal_pub_get_remote_search_results($remote_db, $search_array, $limit, $pager_id);
-  
+    
     // generate the pager
     $total_pages = $pager_total[$pager_id];
     $total_items = $pager_total_items[$pager_id];
@@ -88,15 +88,23 @@ function tripal_pub_importer_setup($action = 'new', $pub_import_id = NULL) {
     $i = $page * $limit + 1;
     if (count($pubs) > 0) {
       foreach ($pubs as $pub) {
-        $rows[] = array(number_format($i), $pub['Citation']);
+        $raw_link = '';
+        if($pub['Publication Dbxref']) {
+          $raw_link = l('raw', 'admin/tripal/tripal_pub/import/raw/' . $pub['Publication Dbxref'], array('attributes' => array('target' => '_blank')));
+        }
+        $rows[] = array(
+          number_format($i), 
+          $pub['Citation'], 
+          $raw_link,
+        );        
         $i++;
       }
     }
-    $headers = array('', 'Citation');
+    $headers = array('', 'Publication', '');
     $table = theme('table', $headers, $rows);   
   
     // join all to form the results
-    $output .= "<br><p><b>Found " . number_format($total_items) .  
+    $output .= "<br>Search String: $search_str<br><p><b>Found " . number_format($total_items) .  
       ". Page " . ($page + 1) . " of $total_pages. " .
       " Results</b></br>" . $table . '</p>' . $pager;    
   }
@@ -334,11 +342,17 @@ function tripal_pub_importer_setup_form(&$form_state = NULL, $pub_import_id = NU
     '#type'         => 'submit',
     '#value'        => t('Save Importer'),
   );
+  /*
+  $form['import'] = array(
+    '#type'         => 'submit',
+    '#value'        => t('Save & Import Now'),
+  );*/
   $form['delete'] = array(
     '#type'         => 'submit',
     '#value'        => t('Delete Importer'),
   );
 
+
   return $form;
 }
 
@@ -401,7 +415,8 @@ function tripal_pub_importer_setup_form_submit($form, &$form_state) {
   if ($form_state['values']['op'] == 'Test Importer') {
     $_SESSION['tripal_pub_search']['perform_search'] = 1;
   }
-  if ($form_state['values']['op'] == 'Save Importer') {    
+  if ($form_state['values']['op'] == 'Save Importer' or 
+      $form_state['values']['op'] == 'Save & Import Now') {    
     $record = array(
       'name' => $loader_name,
       'criteria' => serialize($_SESSION['tripal_pub_search']),
@@ -428,7 +443,12 @@ function tripal_pub_importer_setup_form_submit($form, &$form_state) {
       // do the insert        
       if(drupal_write_record('tripal_pub_import', $record)){
         unset($_SESSION['tripal_pub_search']);
-        drupal_set_message('Publication import settings saved.');
+        drupal_set_message('Publication import settings saved.');        
+        // if the user wants to do the import now then do it (may time out
+        // for long jobs)
+        if ($form_state['values']['op'] == 'Save & Import Now') {
+          tripal_pub_import_publications($record['pub_import_id']);
+        }
         drupal_goto('admin/tripal/tripal_pub/import_list');
       }
       else {
@@ -446,7 +466,7 @@ function tripal_pub_importer_setup_form_submit($form, &$form_state) {
     else {
       drupal_set_message('Could not delete publication importer.', 'error');
     }
-  }  
+  } 
 }
 
 /*

+ 8 - 7
tripal_pub/includes/pub_sync.inc

@@ -64,13 +64,14 @@ function tripal_pub_sync_pub($pub) {
   global $user;
   
   $new_node = new stdClass();
-  $new_node->pub_id = $pub->pub_id;
-  $new_node->type = 'chado_pub';
-  $new_node->uid = $user->uid;
-  $new_node->title = $pub->title;
-  $new_node->pyear = $pub->pyear;
-  $new_node->uniquename = $pub->uniquename;
-  $new_node->type_id = $pub->type_id;
+  $new_node->pub_id      = $pub->pub_id;
+  $new_node->type        = 'chado_pub';
+  $new_node->uid         = $user->uid;
+  $new_node->title       = $pub->title;
+  $new_node->pyear       = $pub->pyear;
+  $new_node->uniquename  = $pub->uniquename;
+  $new_node->type_id     = $pub->type_id;
+  $new_node->series_name = $pub->series_name;
 
   node_validate($new_node);
   $errors = form_get_errors();

+ 33 - 19
tripal_pub/includes/pubmed.inc

@@ -28,27 +28,27 @@ function tripal_pub_remote_search_PMID($search_array, $num_to_retrieve, $pager_i
     }
     
     $search_str .= '(';
-    $search_str .= implode("+", preg_split('/\s+/', trim($search_terms)));
+    
     if($scope == 'title') {
-      $search_str .= '[Title]';
+      $search_str .= '"' . implode("+", preg_split('/\s+/', trim($search_terms))) . '"[Title]';
     }    
     elseif($scope == 'author') {
-      $search_str .= '[Author]';
+      $search_str .= '"' . implode("+", preg_split('/\s+/', trim($search_terms))) . '"[Author]';
     }    
     elseif($scope == 'abstract') {
-      $search_str .= '[Title/Abstract]';
+      $search_str .= '"' . implode("+", preg_split('/\s+/', trim($search_terms))) . '"[Title/Abstract]';
     }
     elseif($scope == 'id') {
-      $search_str .= '[Uid]';
-    }
+      $search_str .= '' . implode("+", preg_split('/\s+/', trim($search_terms))) . '[Uid]';      
+    }    
     $search_str .= ')'; 
   }  
   $search_array['limit'] = $num_to_retrieve;
-  $search_array['search_terms'] = $search_str;
-
+  $search_array['search_string'] = $search_str;
+    
   // we want to get the list of pubs using the search terms but using a Drupal style pager
-  $pubs = tripal_pager_callback('tripal_pub_PMID_range',  
-    $num_to_retrieve, $pager_id, 'tripal_pub_PMID_count', $search_array);
+  $pubs = tripal_pager_callback('tripal_pub_PMID_range',  $num_to_retrieve, $pager_id, 
+    'tripal_pub_PMID_count', $search_array);
  
   return $pubs;
 }
@@ -59,7 +59,7 @@ function tripal_pub_remote_search_PMID($search_array, $num_to_retrieve, $pager_i
  * the dataset to be paged.
  */
 function tripal_pub_PMID_count($search_array) {
-  $terms = $search_array['search_terms'];
+  $terms = $search_array['search_string'];
   $days  = $search_array['days'];
   $limit = $search_array['limit'];
   
@@ -67,7 +67,7 @@ function tripal_pub_PMID_count($search_array) {
   $_SESSION['tripal_pub_PMID_query'][$terms]['Count'] = $results['Count'];
   $_SESSION['tripal_pub_PMID_query'][$terms]['WebEnv'] = $results['WebEnv'];
   $_SESSION['tripal_pub_PMID_query'][$terms]['QueryKey'] = $results['QueryKey'];
-  
+   
   return $results['Count'];
 
 }
@@ -78,11 +78,14 @@ function tripal_pub_PMID_count($search_array) {
  * within the specified range
  */
 function tripal_pub_PMID_range($search_array, $start = 0, $limit = 10) {
-  $terms = $search_array['search_terms'];
+  $terms = $search_array['search_string'];
   $days  = $search_array['days'];
   $limit = $search_array['limit'];
   
   $count = $_SESSION['tripal_pub_PMID_query'][$terms]['Count'];
+  if ($count == 0) {
+    return array();
+  }
     
   // get the query_key and the web_env from the previous count query.
   $query_key = $_SESSION['tripal_pub_PMID_query'][$terms]['QueryKey'];
@@ -121,7 +124,11 @@ function tripal_pub_PMID_search_init($terms, $retmax, $days = 0){
   // do a search for a single result so that we can establish a history, and get
   // the number of records. Once we have the number of records we can retrieve
   // those requested in the range.
-  $query_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=Pubmed&retmax=$retmax&usehistory=y&term=$terms";
+  $query_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?" .
+    "db=Pubmed" .
+    "&retmax=$retmax" .
+    "&usehistory=y".
+    "&term=$terms";
   if($days) {
     $query_url .= "&reldate=$days&datetype=edat";
   }
@@ -181,8 +188,15 @@ function tripal_pub_PMID_fetch($query_key, $web_env, $rettype = 'null',
 
   // repeat the search performed previously (using WebEnv & QueryKey) to retrieve
   // the PMID's within the range specied.  The PMIDs will be returned as a text list
-  $fetch_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?rettype=$rettype&retmode=$retmod&retstart=$start&retmax=$limit&db=Pubmed&query_key=$query_key&WebEnv=$web_env";
-  //dpm($fetch_url);
+  $fetch_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?".
+    "rettype=$rettype" .
+    "&retmode=$retmod" . 
+    "&retstart=$start" .
+    "&retmax=$limit" .
+    "&db=Pubmed" .
+    "&query_key=$query_key".
+    "&WebEnv=$web_env";
+  
   foreach ($args as $key => $value) {
     if(is_array($value)) {
       $fetch_url .= "&$key=";
@@ -195,10 +209,10 @@ function tripal_pub_PMID_fetch($query_key, $web_env, $rettype = 'null',
       $fetch_url .= "&$key=$value";
     }
   }
-  
+  //dpm($fetch_url);
   $rfh = fopen($fetch_url, "r");
   if (!$rfh) {
-    drupal_set_message('Could not perform Pubmed query. Cannot connect to Entrez.', 'error');
+    drupal_set_message('ERROR: Could not perform PubMed query.', 'error');
     return '';    
   }
   $results = '';
@@ -321,7 +335,7 @@ function tripal_pub_PMID_parse_pubxml($pub_xml) {
     $pub['Citation'] .= ':' . $pub['Pages'];
   }
   
-  //$pub['xml'] = $pub_xml;
+  $pub['xml'] = $pub_xml;
   return $pub;
 }
 

+ 7 - 0
tripal_pub/tripal_pub.module

@@ -127,6 +127,13 @@ function tripal_pub_menu() {
     'type ' => MENU_CALLBACK,
   );
   
+  $items['admin/tripal/tripal_pub/import/raw/%'] = array(
+    'page callback' => 'tripal_pub_get_raw_data',
+    'page arguments' => array(5),
+    'access arguments' => array('administer tripal pubs'),
+    'type ' => MENU_CALLBACK,
+  );
+  
   $items['admin/tripal/tripal_pub/import/edit/%'] = array(    
     'page callback' => 'tripal_pub_importer_setup',
     'page arguments' => array(4, 5),