فهرست منبع

Fixes to importers, citation creation, and publication OBO

spficklin 11 سال پیش
والد
کامیت
7abae40bc7

+ 85 - 30
tripal_pub/api/tripal_pub.api.inc

@@ -1018,30 +1018,10 @@ function tripal_pub_get_publication_array($pub_id, $skip_existing = TRUE) {
   );
   $ptypes = tripal_core_generate_chado_var('pubprop', $values); 
   $ptypes = tripal_core_expand_chado_vars($ptypes, 'field', 'pubprop.value', $options);
-  $found_type = 0;
   foreach ($ptypes as $ptype) {
-    if($ptype->value == 'Journal Article') { 
-      $pub['Publication Type'] = 'Journal Article';   
-      $found_type = 1;  
-    }
-    if($ptype->value == 'Book') {
-    	$pub['Publication Type'] = 'Book';
-      $found_type = 1;
-    }
-    if($ptype->value == 'Book Chapter') {
-    	$pub['Publication Type'] = 'Book Chapter';
-      $found_type = 1;
-    }    
-    if($ptype->value == 'Conference Proceedings') {
-    	$pub['Publication Type'] = 'Conference Proceedings';
-      $found_type = 1;
-    }
+  	$pub['Publication Type'][] = $ptype->value;
   }
-  if (!$found_type) {
-    watchdog('tripal_pub', "Publication type is not yet handled for creating citations: %pub_id", 
-      array('%pub_id' => $pub_id), WATCHDOG_ERROR);
-    return FALSE;
-  }  
+ 
 
   // ---------------------------------
   // get the authors list
@@ -1080,18 +1060,54 @@ function tripal_pub_get_publication_array($pub_id, $skip_existing = TRUE) {
  * @param $pub
  *   An array structure containing publication details where the keys
  *   are the publication ontology term names and values are the 
- *   corresponding details.
- * @param $type
- *   The type of publication. Supported types include
- *   'Journal Article', 'Book', 'Book Chapter', 
- *   'Conference Proceedings',
+ *   corresponding details.  The pub array can contain the following
+ *   keys with corresponding values:   
+ *     - Publication Type:  an array of publication types. a publication can have more than one type
+ *     - Authors: a  string containing all of the authors of a publication
+ *     - Journal Name:  a string containing the journal name
+ *     - Journal Abbreviation: a string containing the journal name abbreviation
+ *     - Series Name: a string containing the series (e.g. conference proceedings) name
+ *     - Series Abbreviation: a string containing the series name abbreviation     
+ *     - Volume: the serives volume number
+ *     - Issue: the series issue number
+ *     - Pages: the page numbers for the publication
+ *     - Publication Date:  A date in the format "Year Month Day"
+ *      
  * @return
  *   A text string containing the citation
  */
-function tripal_pub_create_citation($pub, $type = 'Journal Article') {
+function tripal_pub_create_citation($pub) {
 	$citation = '';
+	$pub_type = '';
+	
+	// An article may have more than one publication type. For example,
+	// a publication type can be 'Journal Article' but also a 'Clinical Trial'.
+	// Therefore, we need to select the type that makes most sense for 
+	// construction of the citation. Here we'll iterate through them all
+	// and select the one that matches best.
+	if(is_array($pub['Publication Type'])) {		
+	  foreach ($pub['Publication Type'] as $ptype) {
+	    if ($ptype == 'Journal Article' or
+	        $ptype == 'Book' or
+	        $ptype == 'Book Chapter' or
+	        $ptype == 'Conference Proceedings'){ 
+	      $pub_type = $ptype;   
+	    }
+	  }
+	  if (!$pub_type) {
+      watchdog('tripal_pub', "Cannot generate citation for publication type: %types", 
+        array('%types' => print_r($pub['Publication Type'], TRUE)), WATCHDOG_ERROR);
+      return FALSE;
+    }
+	}
+	else {
+	  $pub_type = $pub['Publication Type'];
+	}	
 	
-	if ($type == 'Journal Article') {
+	//----------------------
+  // Journal Article
+  //----------------------
+	if ($pub_type == 'Journal Article') {
 	  $citation = $pub['Authors'] . '. ' . $pub['Title'] .  '. ';
 	
 	  if ($pub['Journal Name']) {
@@ -1118,9 +1134,48 @@ function tripal_pub_create_citation($pub, $type = 'Journal Article') {
 	  }
 	  $citation .= '.';
 	}
-	if ($type == 'Book') {
+	//----------------------
+  // Book
+  //----------------------
+	elseif ($pub_type == 'Book') {
 	
 	}
+	//----------------------
+  // Book Chapter
+  //----------------------
+  elseif ($pub_type == 'Book Chapter') {
+  	
+  }
+	//----------------------
+	// Conference Proceedings
+  //----------------------
+  elseif ($pub_type == 'Conference Proceedings') {
+    $citation = $pub['Authors'] . '. ' . $pub['Title'] .  '. ';
+  
+    if ($pub['Series Name']) {
+      $citation .= $pub['Series Name'] . '. ';
+    }
+    elseif ($pub['Series Abbreviation']) {
+      $citation .= $pub['Series Abbreviation'] . '. ';
+    }
+    $citation .= $pub['Publication Date'];
+    if ($pub['Volume'] or $pub['Issue'] or $pub['Pages']) {
+      $citation .= '; ';
+    }
+    if ($pub['Volume']) {
+      $citation .= $pub['Volume'];
+    }
+    if ($pub['Issue']) {
+      $citation .= '(' . $pub['Issue'] . ')';
+    }
+    if ($pub['Pages']) {
+      if($pub['Volume']) {
+        $citation .= ':';
+      }
+      $citation .= $pub['Pages'];
+    }
+    $citation .= '.';
+  }
 
   return $citation;
 }

+ 14 - 1
tripal_pub/files/tpub.obo

@@ -112,8 +112,9 @@ relationship: part_of TPUB:0000002 ! Publication
 [Term]
 id: TPUB:0000038
 name: Journal Abbreviation
-relationship: part_of TPUB:0000037 ! Publication Details
 def: The journal title ISO Abbreviation.
+is_a: TPUB:0000257 ! Series Abbreviation
+relationship: part_of TPUB:0000037 ! Publication Details
 
 [Term]
 id: TPUB:0000039
@@ -272,6 +273,7 @@ is_a: TPUB:0000070 ! Conference Proceedings
 [Term]
 id: TPUB:0000075
 name: Journal Name
+is_a: TPUB:0000256 ! Series Name
 relationship: part_of TPUB:0000037 ! Publication Details
 
 [Term]
@@ -1410,6 +1412,17 @@ name: Patent Number
 relationship: part_of TPUB:0000037 ! Publication Details
 def: A unique special code for journal article, book, book chapter or any publication type.
 
+[Term]
+id: TPUB:0000256
+name: Series Name
+relationship: part_of TPUB:0000037 ! Publication Details
+def: The name media that produces a series of publications (e.g. journal, conference proceedings, etc.)
+
+[Term]
+id: TPUB:0000257
+name: Series Abbreviation
+relationship: part_of TPUB:0000037 ! Publication Details
+def: An abbreviation for the series name.  (e.g. the journal title ISO Abbreviation).
 
 [Typedef]
 id: is_a

+ 17 - 2
tripal_pub/includes/importers/AGL.inc

@@ -436,8 +436,10 @@ function tripal_pub_AGL_range($search_array, $start = 0, $limit = 10) {
 function tripal_pub_AGL_parse_pubxml($pub_xml) {
   $pub = array();
 
-  // publications from the NAL Article Citation Database are all journal articles
-  $pub['Publication Type'][] = 'Journal Article';
+  // we will set the default publication type as a journal article. The NAL
+  // dataset doesn't specify an article type so we'll have to glean the type
+  // from other information (e.g. series name has 'Proceedings' in it)
+  $pub['Publication Type'][0] = 'Journal Article';
 
   if (!$pub_xml) {
     return $pub;
@@ -693,7 +695,20 @@ function tripal_pub_AGL_parse_pubxml($pub_xml) {
           $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
           foreach ($codes as $code => $value) {
             switch ($code) {
+            	case 'a':
+            		if (preg_match('/Proceedings/i', $value)) {
+            		  $pub['Series Name'] = preg_replace('/\.$/', '', $value);
+            		  $pub['Publication Type'][0] = 'Conference Proceedings';
+            		}
+            		else {
+            			$pub['Journal Name'] = preg_replace('/\.$/', '', $value);
+            		}
+                break;
               case 't':
+                if (preg_match('/Proceedings/i', $value)) {
+                  $pub['Series Name'] = preg_replace('/\.$/', '', $value);
+                  $pub['Publication Type'][0] = 'Conference Proceedings';
+                }
                 $pub['Journal Name'] = preg_replace('/\.$/', '', $value);
                 break;
               case 'g':

+ 30 - 12
tripal_pub/includes/importers/PMID.inc

@@ -56,30 +56,47 @@ function tripal_pub_remote_search_PMID($search_array, $num_to_retrieve, $pager_i
 
     // if this is phrase make sure the search terms are surrounded by quotes
     if ($is_phrase) {
-      $search_terms = "\"$search_terms\"";
+      $search_str .= "(\"$search_terms\" |SCOPE|) ";
+    }
+    // if this is not a phase then we want to separate each 'OR or 'AND' into a unique criteria
+    else {
+    	$search_str .= "(";
+    	if (preg_match('/and/i', $search_terms)) {
+    	  $elements = preg_split('/\s+and+\s/i', $search_terms);
+    	  foreach ($elements as $element) {
+          $search_str .= "($element |SCOPE|) AND ";
+    	  }
+    	  $search_str = substr($search_str, 0, -5); // remove trailing 'AND '              
+    	} 
+      elseif (preg_match('/or/i', $search_terms)) {
+        $elements = preg_split('/\s+or+\s/i', $search_terms);
+        foreach ($elements as $element) {
+          $search_str .= "($element |SCOPE|) OR ";
+        }
+        $search_str = substr($search_str, 0, -4); // remove trailing 'OR '        
+      }
+      else {
+      	$search_str .= "($search_terms |SCOPE|) ";
+      }
+      $search_str .= ') ';
     }
-    $search_str .= '(';
 
     if ($scope == 'title') {
-      $search_str .= $search_terms . '[Title]';
+    	$search_str = preg_replace('/\|SCOPE\|/', '[Title]', $search_str);
     }
     elseif ($scope == 'author') {
-      $search_str .= $search_terms . '[Author]';
+    	$search_str = preg_replace('/\|SCOPE\|/', '[Author]', $search_str);
     }
     elseif ($scope == 'abstract') {
-      $search_str .= $search_terms . '[Title/Abstract]';
+    	$search_str = preg_replace('/\|SCOPE\|/', '[Title/Abstract]', $search_str);
     }
     elseif ($scope == 'journal') {
-      $search_str .= $search_terms . '[Journal]';
+      $search_str = preg_replace('/\|SCOPE\|/', '[Journal]', $search_str);
     }
     elseif ($scope == 'id') {
-      $search_terms = preg_replace('/PMID:([^\s]*)/', '$1', $search_terms);
-      $search_str .=  $search_terms . '[Uid]';
-    }
-    else {
-      $search_str .= $search_terms;
+      $search_str = preg_replace('/PMID:([^\s]*)/', '$1', $search_str);
+      $search_str = preg_replace('/\|SCOPE\|/', '[Uid]', $search_str);
     }
-    $search_str .= ')';
   }
   if ($days) {
     // get the date of the day suggested
@@ -91,6 +108,7 @@ function tripal_pub_remote_search_PMID($search_array, $num_to_retrieve, $pager_i
   $search_array['limit'] = $num_to_retrieve;
   $search_array['search_string'] = $search_str;
 
+  dpm($search_str);
   unset($_SESSION['tripal_pub_PMID_query']);
   // we want to get the list of pubs using the search terms but using a Drupal style pager
   $pubs = tripal_pager_callback('tripal_pub_PMID_range',  $num_to_retrieve, $pager_id,

+ 11 - 1
tripal_pub/includes/pub_importers.inc

@@ -309,6 +309,7 @@ function tripal_pub_importer_setup_form(&$form_state = NULL, $pub_import_id = NU
     'NOT' => 'NOT'
     );
     for($i = 1; $i <= $num_criteria; $i++) {
+    	$is_phrase = 1;
 
       // if we have criteria supplied from the database then use that as the initial defaults
       if ($criteria) {
@@ -322,6 +323,7 @@ function tripal_pub_importer_setup_form(&$form_state = NULL, $pub_import_id = NU
       $scope        = isset($_SESSION['tripal_pub_import']['criteria'][$i]['scope'])        ? $_SESSION['tripal_pub_import']['criteria'][$i]['scope']        : $scope;
       $is_phrase    = isset($_SESSION['tripal_pub_import']['criteria'][$i]['is_phrase'])    ? $_SESSION['tripal_pub_import']['criteria'][$i]['is_phrase']    : $is_phrase;
       $operation    = isset($_SESSION['tripal_pub_import']['criteria'][$i]['operation'])    ? $_SESSION['tripal_pub_import']['criteria'][$i]['operation']    : $operation;
+      
       // If the form_state has variables then use those.  This happens when an error occurs on the form or the
       // form is resbumitted using AJAX
       if ($form_state['values']) {
@@ -335,7 +337,7 @@ function tripal_pub_importer_setup_form(&$form_state = NULL, $pub_import_id = NU
       '#type'          => 'textfield',
       '#description'   => t('Please provide a list of words for searching. You may use 
         conjunctions such as "AND" or "OR" to separate words if they are expected in 
-        the same scope. Uncheck the "Is Phrase" checkbox to use conjunctions'),
+        the same scope, but do not mix ANDs and ORs.  Uncheck the "Is Phrase" checkbox to use conjunctions'),
       '#default_value' => $search_terms,
       '#required'      => TRUE,
       );
@@ -437,10 +439,18 @@ function tripal_pub_importer_setup_form_validate($form, &$form_state) {
     $scope =  $form_state['values']["scope-$i"];
     $is_phrase =  $form_state['values']["is_phrase-$i"];
     $operation =  $form_state['values']["operation-$i"];
+    
+    if (!$is_phrase) {
+    	if (preg_match('/and/i', $search_terms) and preg_match('/or/i', $search_terms)) {
+    	  form_set_error("search_terms-$i", "You may use 'AND' or 'OR' but cannot use both. Add a new entry below with the same scope for the other conunction.");
+    	  $_SESSION['tripal_pub_import']['perform_search'] = 0;	
+    	}
+    }
   }
 
   if ($days and !is_numeric($days) or preg_match('/\./', $days)) {
     form_set_error("days", "Please enter a numeric, non decimal value, for the number of days.");
+    $_SESSION['tripal_pub_import']['perform_search'] = 0;
   }
   // allow the selected remote database to validate any changes to the form if needed
   $callback = "tripal_pub_remote_validate_form_$remote_db";