Browse Source

Fixes to API, bug fixes, etc.

Stephen Ficklin 10 years ago
parent
commit
fda4232253

+ 0 - 1
tripal_contact/includes/tripal_contact.chado_node.inc

@@ -154,7 +154,6 @@ function chado_contact_form(&$node, $form_state) {
   }
   else {
     $contact_types = tripal_get_cvterm_default_select_options('contact', 'type_id', 'contact types');
-    $contact_types[0] = 'Select a Type';
   }
   $form['type_id'] = array(
     '#type' => 'select',

+ 0 - 58
tripal_core/api/tripal_core.chado_variables.api.inc

@@ -4,64 +4,6 @@
  * This API generates objects containing the full details of a record(s) in chado.
  */
 
-/**
- * Implements hook_exclude_type_by_default()
- *
- * This hooks allows fields of a specified type that match a specified criteria to be excluded by
- * default from any table when chado_generate_var() is called. Keep in mind that if
- * fields are excluded by default they can always be expanded at a later date using
- * chado_expand_var().
- *
- * Criteria are php strings that evaluate to either TRUE or FALSE. These strings are evaluated using
- * drupal_eval() which suppresses syntax errors and throws watchdog entries of type php. There are
- * also watchdog entries of type tripal_core stating the exact criteria evaluated. Criteria can
- * contain the following tokens:
- *   - <field_name>
- *       Replaced by the name of the field to be excluded
- *   - <field_value>
- *       Replaced by the value of the field in the current record
- * Also keep in mind that if your criteria doesn't contain the &gt;field_value&lt;  token then it will be
- * evaluated before the query is executed and if the field is excluded it won't be included in the
- * query.
- *
- * @return
- *   An array of type => criteria where the type is excluded if the criteria evaluates to TRUE
- *
- * @ingroup tripal_chado_query_api
- */
-function tripal_core_exclude_type_by_default() {
-  return array('text' => 'strlen("<field_value> ") > 250');
-}
-
-/**
- * Implements hook_exclude_field_from_<tablename>_by_default()
- *
- * This hooks allows fields from a specified table that match a specified criteria to be excluded by
- * default from any table when chado_generate_var() is called. Keep in mind that if
- * fields are excluded by default they can always be expanded at a later date using
- * chado_expand_var().
- *
- * Criteria are php strings that evaluate to either TRUE or FALSE. These strings are evaluated using
- * drupal_eval() which suppresses syntax errors and throws watchdog entries of type php. There are
- * also watchdog entries of type tripal_core stating the exact criteria evaluated. Criteria can
- * contain the following tokens:
- *   - <field_name>
- *       Replaced by the name of the field to be excluded
- *   - <field_value>
- *       Replaced by the value of the field in the current record
- * Also keep in mind that if your criteria doesn't contain the <field_value>  token then it will be
- * evaluated before the query is executed and if the field is excluded it won't be included in the
- * query.
- *
- * @return
- *   An array of field => criteria where the type is excluded if the criteria evaluates to TRUE
- *
- * @ingroup tripal_chado_query_api
- */
-function tripal_core_exclude_field_from_feature_by_default() {
-  return array('residues' => 'TRUE');
-}
-
 /**
  * Generates an array containing the full details of a record(s) in chado. The
  * returned array differs from the array returned by chado_select_record as all foreign key

+ 29 - 0
tripal_core/tripal_core.module

@@ -601,4 +601,33 @@ function tripal_core_node_view($node, $view_mode, $langcode) {
       }
     }
   }
+}
+
+/**
+ * Implements hook_exclude_type_by_default()
+ *
+ * This hooks allows fields of a specified type that match a specified criteria to be excluded by
+ * default from any table when chado_generate_var() is called. Keep in mind that if
+ * fields are excluded by default they can always be expanded at a later date using
+ * chado_expand_var().
+ *
+ * Criteria are php strings that evaluate to either TRUE or FALSE. These strings are evaluated using
+ * drupal_eval() which suppresses syntax errors and throws watchdog entries of type php. There are
+ * also watchdog entries of type tripal_core stating the exact criteria evaluated. Criteria can
+ * contain the following tokens:
+ *   - <field_name>
+ *       Replaced by the name of the field to be excluded
+ *   - <field_value>
+ *       Replaced by the value of the field in the current record
+ * Also keep in mind that if your criteria doesn't contain the &gt;field_value&lt;  token then it will be
+ * evaluated before the query is executed and if the field is excluded it won't be included in the
+ * query.
+ *
+ * @return
+ *   An array of type => criteria where the type is excluded if the criteria evaluates to TRUE
+ *
+ * @ingroup tripal_core
+ */
+function tripal_core_exclude_type_by_default() {
+  return array('text' => 'strlen("<field_value> ") > 250');
 }

+ 18 - 19
tripal_cv/api/tripal_cv.api.inc

@@ -272,26 +272,25 @@ function tripal_get_cvterm($identifiers, $options = array()) {
  *   which provides a list of all chado cvterms
  *
  * @param $cv_id
- *   The chado cv_id;
- *   only cvterms with the supplied cv_id will be returned
+ *   The chado cv_id; only cvterms with the supplied cv_id will be returned
+ *   
  * @return
- *   An array(cvterm_id => name)
- *   for each cvterm in the chado cvterm table where cv_id=that supplied
+ *   An associative array with the cvterm_id's as keys. The first
+ *   element in the array has a key of '0' and a value of 'Select a Type'
  *
  * @ingroup tripal_cv_api
  */
-function tripal_get_cvterm_select_options($cv_id = 0) {
+function tripal_get_cvterm_select_options($cv_id) {
+  $columns = array('cvterm_id', 'name');
+  $values = array('cv_id' => $cv_id);
+  $s_options = array('order_by' => array('name' => 'ASC'));
 
-  if ($cv_id > 0) {
-    $results = chado_select_record('cvterm', array('cvterm_id', 'name'), array('cv_id' => $cv_id));
-  }
-  else {
-    $results = chado_select_record('cvterm', array('cvterm_id', 'name'), array());
-  }
+  $cvterms = chado_select_record('cvterm', $columns, $values, $s_options); 
 
   $options = array();
-  foreach ($results as $r) {
-    $options[$r->cvterm_id] = $r->name;
+  $options[0] = 'Select a Type';
+  foreach ($cvterms as $cvterm) {
+    $options[$cvterm->cvterm_id] = $cvterm->name;
   }
 
   return $options;
@@ -312,12 +311,12 @@ function tripal_get_cvterm_select_options($cv_id = 0) {
  *
  * @ingroup tripal_cv_api
  */
-function tripal_update_cvtermpath($cvid, $job_id = NULL) {
+function tripal_update_cvtermpath($cv_id, $job_id = NULL) {
   // TODO: need better error checking in this function
 
   // first get the controlled vocabulary name:
   $sql = "SELECT * FROM {cv} WHERE cv_id = :cv_id";
-  $cv = chado_query($sql, array(':cv_id' => $cvid))->fetchObject();
+  $cv = chado_query($sql, array(':cv_id' => $cv_id))->fetchObject();
 
   print "\nUpdating cvtermpath for $cv->name...\n";
 
@@ -817,12 +816,12 @@ function tripal_submit_obo_job($obo) {
  * Add the obo to the tripal_cv_obo table in the Drupal database
  *
  * @param $name
- * The human readable name of this ontology
+ *   The human readable name of this ontology
  * @param $path
- * The file path or URL of the ontology
+ *   The file path or URL of the ontology
  *
  * @return
- * Returns the ontology ID
+ *   Returns the ontology ID
  *
  * @ingroup tripal_cv_api
  */
@@ -883,7 +882,7 @@ function tripal_autocomplete_cvterm($cv_id, $string = '') {
  *
  * @ingroup tripal_db_api
  */
-function tripal_associate_cvterm($basetable, $record_id, $cvterm) {
+function tripal_associate_cvterm($basetable, $record_id, $cvterm, $options = array()) {
   $linking_table = $basetable . '_cvterm';
   $foreignkey_name = $basetable . '_id';
 

+ 3 - 3
tripal_db/api/tripal_db.api.inc

@@ -137,13 +137,13 @@ function tripal_get_db($identifiers, $options = array()) {
  */
 function tripal_get_db_select_options() {
 
-  $result = chado_query("SELECT db_id, name FROM {db} ORDER BY name");
+  $dbs = chado_query("SELECT db_id, name FROM {db} ORDER BY name");
 
   $options = array();
   $options[] = 'Select a Database';
   
-  foreach ($result as $r) {
-    $options[$r->db_id] = $r->name;
+  foreach ($dbs as $db) {
+    $options[$db->db_id] = $db->name;
   }
 
   return $options;

+ 29 - 0
tripal_feature/tripal_feature.module

@@ -618,3 +618,32 @@ function tripal_feature_form_alter(&$form, &$form_state, $form_id) {
     unset($form['body']);
   }
 }
+
+/**
+ * Implements hook_exclude_field_from_<tablename>_by_default()
+ *
+ * This hooks allows fields from a specified table that match a specified criteria to be excluded by
+ * default from any table when chado_generate_var() is called. Keep in mind that if
+ * fields are excluded by default they can always be expanded at a later date using
+ * chado_expand_var().
+ *
+ * Criteria are php strings that evaluate to either TRUE or FALSE. These strings are evaluated using
+ * drupal_eval() which suppresses syntax errors and throws watchdog entries of type php. There are
+ * also watchdog entries of type tripal_core stating the exact criteria evaluated. Criteria can
+ * contain the following tokens:
+ *   - <field_name>
+ *       Replaced by the name of the field to be excluded
+ *   - <field_value>
+ *       Replaced by the value of the field in the current record
+ * Also keep in mind that if your criteria doesn't contain the <field_value>  token then it will be
+ * evaluated before the query is executed and if the field is excluded it won't be included in the
+ * query.
+ *
+ * @return
+ *   An array of field => criteria where the type is excluded if the criteria evaluates to TRUE
+ *
+ * @ingroup tripal_feature
+ */
+function tripal_feature_exclude_field_from_feature_by_default() {
+  return array('residues' => 'TRUE');
+}

+ 12 - 14
tripal_organism/api/tripal_organism.api.inc

@@ -135,17 +135,17 @@ function tripal_get_organism($identifiers, $options = array()) {
  * @ingroup tripal_organism_api
  */
 function tripal_get_organism_select_options($syncd_only = TRUE) {
-
+  $org_list = array();
+  $org_list[] = 'Select an organism';
+  
   if ($syncd_only) {
-    // use this SQL for getting synced organisms
-    $dsql =  "SELECT * FROM {chado_organism}";
-    $orgs = db_query($dsql);
-
-    // use this SQL statement for getting the organisms
-    $csql =  "SELECT * FROM {organism} " .
-             "WHERE organism_id = :organism_id";
-
-    $org_list = array();
+    $sql = "
+      SELECT * 
+      FROM public.chado_organism CO
+        INNER JOIN {organism} O ON O.organism_id = CO.organism_id
+      ORDER BY O.genus, O.species
+    ";
+    $orgs = chado_query($sql);
 
     // iterate through the organisms and build an array of those that are synced
     foreach ($orgs as $org) {
@@ -156,10 +156,8 @@ function tripal_get_organism_select_options($syncd_only = TRUE) {
   }
   else {
     // use this SQL statement for getting the organisms
-    $csql =  "SELECT * FROM {organism}";
-    $orgs = chado_query($csql)->execute();
-
-    $org_list = array();
+    $csql =  "SELECT * FROM {organism} ORDER BY genus, species";
+    $orgs = chado_query($csql);
 
     // iterate through the organisms and build an array of those that are synced
     foreach ($orgs as $org) {

+ 2 - 596
tripal_pub/api/tripal_pub.api.inc

@@ -15,255 +15,6 @@
  * @}
  */
 
-/**
- * Retrieves a list of publications as an associated array where
- *  keys correspond directly with Tripal Pub CV terms.
- *
- * @param remote_db
- *    The name of the remote publication database to query. These names should
- *    match the name of the databases in the Chado 'db' table. Currently
- *    supported databass include
- *      'PMID':  PubMed
- *
- * @param search_array
- *    An associate array containing the search criteria. The following key
- *    are expected
- *      'remote_db':     Specifies the name of the remote publication database
- *      'num_criteria':  Specifies the number of criteria present in the search array
- *      'days':          The number of days to include in the search starting from today
- *      'criteria':      An associate array containing the search critiera. There should
- *                       be no less than 'num_criteria' elements in this array.
- *
- *    The following keys are expected in the 'criteria' array
- *      'search_terms':  A list of terms to search on, separated by spaces.
- *      'scope':         The fields to search in the remote database. Valid values
- *                       include: 'title', 'abstract', 'author' and 'any'
- *      'operation':     The logical operation to use for this criteria. Valid
- *                       values include: 'AND', 'OR' and 'NOT'.
- * @param $num_to_retrieve
- *    The number of records to retrieve.  In cases with large numbers of
- *    records to retrieve, the remote database may limit the size of each
- *    retrieval.
- * @param $page
- *    Optional.  If this function is called where the
- *    page for the pager cannot be set using the $_GET variable, use this
- *    argument to specify the page to retrieve.
- *
- * @return
- *   Returns an array of pubs where each element is
- *   an associative array where the keys are Tripal Pub CV terms.
- *
- * @ingroup tripal_pub_api
- */
-function tripal_get_remote_pubs($remote_db, $search_array, $num_to_retrieve, $page = 0) {
-
-  // now call the callback function to get the results
-  $callback = "tripal_pub_remote_search_$remote_db";
-  $pubs =  array(
-    'total_records' => 0,
-    'search_str'    => '',
-    'pubs'          => array(),
-  );
-  if (function_exists($callback)) {
-    $pubs = call_user_func($callback, $search_array, $num_to_retrieve, $page);
-  }
-  return $pubs;
-}
-
-/**
- * This function is used to perfom a query using one of the supported databases
- * and return the raw query results. This may be XML or some other format
- * as provided by the database.
- *
- * @param $dbxref
- *   The unique database ID for the record to retrieve.  This value must
- *   be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
- *   database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
- *   for the record in the database.
- *
- * @return
- *   Returns the publication array or FALSE if a problem occurs
- *
- * @ingroup tripal_pub_api
- */
-function tripal_get_remote_pub($dbxref) {
-
-  if(preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
-    $remote_db = $matches[1];
-    $accession = $matches[2];
-
-    // check that the database is supported
-    $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
-    if(!in_array($remote_db, $supported_dbs)) {
-      return FALSE;
-    }
-
-    $search = array(
-      'num_criteria' => 1,
-      'remote_db' => $remote_db,
-      'criteria' => array(
-        '1' => array(
-          'search_terms' => "$remote_db:$accession",
-          'scope' => 'id',
-          'operation' => '',
-          'is_phrase' => 0,
-        ),
-      ),
-    );
-    $pubs = tripal_get_remote_pubs($remote_db, $search, 1, 0);
-
-    return $pubs['pubs'][0];
-  }
-  return FALSE;
-}
-
-/**
- * Builds the SQL statement need to search Chado for the publications
- * that match the user supplied criteria.  Tpyically, this function is
- * called by the search form generated by the tripal_pub_search_form() function
- * but this function is included in the API for calling by anyone.
- *
- * @param $search_array
- *   An array of search criteria provided by the user. The search array is
- *   an associative array with the following keys:
- *     'num_criteria': an integer indicating the number of search criteria supplied
- *     'from_year':    filters records by a start year
- *     'to_year':      filters records by an end year
- *     'criteria':     an array of criteria. Each criteria is an associative
- *                     array with the following keys:
- *                     'search_terms':   The text used for searching
- *                     'scope':          The cvterm_id of the property used for filtering
- *                     'mode':           The operation (e.g. AND, OR or NOT)
- * @param $offset
- *   The offset for paging records.  The first record returned will be
- *   at the offset indicated here, and the next $limit number of records
- *   will be returned.
- *
- * @param $limit
- *   The number of records to retrieve
- *
- * @param total_records
- *   A value passed by reference. This value will get set to the total
- *   number of matching records
- *
- * @return
- *   a PDO database object of the query results.
- *
- * @ingroup tripal_pub
- */
-function tripal_search_publications($search_array, $offset, $limit, &$total_records) {
-
-  // build the SQL based on the criteria provided by the user
-  $select = "SELECT DISTINCT P.*, CP.nid ";
-  $from   = "FROM {pub} P
-               LEFT JOIN public.chado_pub CP on P.pub_id = CP.pub_id
-               INNER JOIN {cvterm} CVT on CVT.cvterm_id = P.type_id
-            ";
-  $where  = "WHERE (NOT P.title = 'null') "; // always exclude the dummy pub
-  $order  = "ORDER BY P.pyear DESC, P.title ASC";
-  $args = array();  // arguments for where clause
-  $join = 0;
-
-  $num_criteria = $search_array['num_criteria'];
-  $from_year    = $search_array['from_year'];
-  $to_year      = $search_array['to_year'];
-
-  for ($i = 1; $i <= $num_criteria; $i++) {
-    $value = $search_array['criteria'][$i]['search_terms'];
-    $type_id = $search_array['criteria'][$i]['scope'];
-    $mode = $search_array['criteria'][$i]['mode'];
-    $op = $search_array['criteria'][$i]['operation'];
-
-    // skip criteria with no values
-    if(!$value) {
-      continue;
-    }
-
-    // to prevent SQL injection make sure our operator is
-    // what we expect
-    if ($op and $op != "AND" and $op != "OR" and $op != 'NOT') {
-      $op = 'AND';
-    }
-    if ($op == 'NOT') {
-      $op = 'AND NOT';
-    }
-    if (!$op) {
-      $op = 'AND';
-    }
-
-    // get the scope type
-    $values = array('cvterm_id' => $type_id);
-    $cvterm = chado_select_record('cvterm', array('name'), $values);
-    $type_name = '';
-    if (count($cvterm) > 0) {
-      $type_name = $cvterm[0]->name;
-    }
-    if ($type_name == 'Title') {
-      $where .= " $op (lower(P.title) LIKE lower(:crit$i)) ";
-      $args[":crit$i"] = '%' . $value . '%';
-    }
-    elseif ($type_name == 'Year') {
-      $where .= " $op (lower(P.pyear) = lower(:crit$i)) ";
-      $args[":crit$i"] = '%' . $value . '%';
-    }
-    elseif ($type_name == 'Volume') {
-      $where .= " $op (lower(P.volume) = lower(:crit$i)) ";
-      $args[":crit$i"] = '%' . $value . '%';
-    }
-    elseif ($type_name == 'Issue') {
-      $where .= " $op (lower(P.issue) = lower(:crit$i)) ";
-      $args[":crit$i"] = '%' . $value . '%';
-    }
-    elseif ($type_name == 'Journal Name') {
-      $from .= " LEFT JOIN {pubprop} PP$i ON PP$i.pub_id = P.pub_id AND PP$i.type_id = :crit$i ";
-      $where .= " $op ((lower(P.series_name) = lower(:crit$i) and CVT.name = 'Journal Article') OR
-      (lower(PP$i.value) = lower(:crit$i))) ";
-      $args[":crit$i"] = $type_id;
-    }
-    elseif ($type_name == 'Conference Name') {
-      $from .= " LEFT JOIN {pubprop} PP$i ON PP$i.pub_id = P.pub_id AND PP$i.type_id = :crit$i ";
-      $where .= " $op ((lower(P.series_name) = lower(:crit$i) and CVT.name = 'Conference Proceedings') OR
-      (lower(PP$i.value) = lower(:crit$i))) ";
-      $args[":crit$i"] = $type_id;
-    }
-    elseif ($type_name == 'Publication Type') {
-      $where .= " $op (lower(CVT.name) = lower(:crit$i))";
-      $args[":crit$i"] = $value;
-    }
-    elseif ($type_id == 0) { //'Any Field'
-      $from .= " LEFT JOIN {pubprop} PP$i ON PP$i.pub_id = P.pub_id ";
-      $where .= " $op (lower(PP$i.value)  LIKE lower(:crit$i) OR
-      lower(P.title) LIKE lower(:crit$i) OR
-      lower(P.volumetitle) LIKE lower(:crit$i) OR
-      lower(P.publisher) LIKE lower(:crit$i) OR
-      lower(P.uniquename) LIKE lower(:crit$i) OR
-      lower(P.pubplace) LIKE lower(:crit$i) OR
-      lower(P.miniref) LIKE lower(:crit$i) OR
-      lower(P.series_name) LIKE lower(:crit$i)) ";
-      $args[":crit$i"] = '%' . $value . '%';
-    }
-    // for all other properties
-    else {
-      $from .= " LEFT JOIN {pubprop} PP$i ON PP$i.pub_id = P.pub_id AND PP$i.type_id = :type_id$i ";
-      $where .= " $op (lower(PP$i.value) LIKE lower(:crit$i)) ";
-      $args[":crit$i"] = '%' . $value . '%';
-      $args[":type_id$i"] = $type_id;
-    }
-  }
-  if($from_year and $to_year) {
-    $where .= " AND (P.pyear ~ '....' AND to_number(P.pyear,'9999') >= :from$i AND to_number(P.pyear,'9999') <= :to$i) ";
-    $args[":from$i"] = $from_year;
-    $args[":to$i"] = $to_year;
-  }
-  $sql = "$select $from $where $order  LIMIT " . (int) $limit . ' OFFSET ' . (int) $offset;
-  $count = "SELECT count(*) FROM ($select $from $where $order) as t1";
-
-  // first get the total number of matches
-  $total_records = chado_query($count, $args)->fetchField();
-  $results = chado_query($sql, $args);
-
-  return $results;
-}
 
 /**
  * Retrieves a chado publication array
@@ -421,10 +172,11 @@ function tripal_get_publication($identifiers, $options = array()) {
  * @return
  *   An array containing the pub_id's of matching publications. Returns an
  *   empty array if no pubs match
+ *
+ * @ingroup tripal_pub_api
  */
 function tripal_publication_exists($pub_details) {
 
-
   // first try to find the publication using the accession number if that key exists in the details array
   if (array_key_exists('Publication Dbxref', $pub_details)) {
     $pub = tripal_get_publication(array('dbxref' => $pub_details['Publication Dbxref']));
@@ -519,349 +271,3 @@ function tripal_publication_exists($pub_details) {
 
   return $return;
 }
-
-/**
- * Updates publication records that currently exist in the Chado pub table
- * with the most recent data in the remote database.
- *
- * @param $do_contact
- *   Set to TRUE if authors should automatically have a contact record added
- *   to Chado. Contacts are added using the name provided by the remote
- *   database.
- * @param $dbxref
- *   The unique database ID for the record to update.  This value must
- *   be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
- *   database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
- *   for the record in the database.
- * @param $db
- *   The name of the remote database to update.  If this value is provided and
- *   no dbxref then all of the publications currently in the Chado database
- *   for this remote database will be updated.
- *
- * @ingroup tripal_pub_api
- */
-function tripal_reimport_publications($do_contact = FALSE, $dbxref = NULL, $db = NULL) {
-
-  print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
-      "If the load fails or is terminated prematurely then the entire set of \n" .
-      "insertions/updates is rolled back and will not be found in the database\n\n";
-  $transaction = db_transaction();
-  try {
-
-    // get a list of all publications by their Dbxrefs that have supported databases
-    $sql = "
-      SELECT DB.name as db_name, DBX.accession
-      FROM pub P
-        INNER JOIN pub_dbxref PDBX ON P.pub_id = PDBX.pub_id
-        INNER JOIN dbxref DBX      ON DBX.dbxref_id = PDBX.dbxref_id
-        INNER JOIN db DB           ON DB.db_id = DBX.db_id
-    ";
-    $args = array();
-    if ($dbxref and preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
-      $dbname = $matches[1];
-      $accession = $matches[2];
-      $sql .= "WHERE DBX.accession = :accession and DB.name = :dbname ";
-      $args[':accession'] = $accession;
-      $args[':dbname'] = $dbname;
-    }
-    elseif ($db) {
-      $sql .= " WHERE DB.name = :dbname ";
-      $args[':dbname'] = $db;
-    }
-    $sql .= "ORDER BY DB.name, P.pub_id";
-    $results = chado_query($sql, $args);
-
-    $num_to_retrieve = 100;
-    $i = 0;                 // count the number of IDs. When we hit $num_to_retrieve we'll do the query
-    $curr_db = '';          // keeps track of the current current database
-    $ids = array();         // the list of IDs for the database
-    $search = array();      // the search array passed to the search function
-
-    // iterate through the pub IDs
-    while ($pub = $results->fetchObject()) {
-      $accession = $pub->accession;
-      $remote_db = $pub->db_name;
-
-      // here we need to only update publications for databases we support
-      $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
-      if(!in_array($remote_db, $supported_dbs)) {
-        continue;
-      }
-      $search = array(
-        'num_criteria' => 1,
-        'remote_db' => $remote_db,
-        'criteria' => array(
-          '1' => array(
-            'search_terms' => "$remote_db:$accession",
-            'scope' => 'id',
-            'operation' => '',
-            'is_phrase' => 0,
-          ),
-        ),
-      );
-      $pubs = tripal_get_remote_pubs($remote_db, $search, 1, 0);
-      tripal_pub_add_publications($pubs, $do_contact, TRUE);
-
-      $i++;
-    }
-
-    // sync the newly added publications with Drupal
-    print "Syncing publications with Drupal...\n";
-    chado_node_sync_records('pub');
-
-    // if the caller wants to create contacts then we should sync them
-    if ($do_contact) {
-      print "Syncing contacts with Drupal...\n";
-      chado_node_sync_records('contact');
-    }
-  }
-  catch (Exception $e) {
-    $transaction->rollback();
-    print "\n"; // make sure we start errors on new line
-    watchdog_exception('T_pub_import', $e);
-    print "FAILED: Rolling back database changes...\n";
-    return;
-  }
-  print "Done.\n";
-}
-
-/**
- * Imports all publications for a given publication import setup.
- *
- * @param $import_id
- *   The ID of the import setup to use
- * @param $job_id
- *   The jobs management job_id for the job if this function is run as a job.
- *
- * @ingroup tripal_pub_api
- */
-function tripal_execute_pub_importer($import_id, $job_id = NULL) {
-  print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
-        "If the load fails or is terminated prematurely then the entire set of \n" .
-        "insertions/updates is rolled back and will not be found in the database\n\n";
-
-  // start the transaction
-  $transaction = db_transaction();
-
-  try {
-    $page = 0;
-    $do_contact = FALSE;
-    $num_to_retrieve = 100;
-
-    // get all of the loaders
-    $args = array(':import_id' => $import_id);
-    $sql = "SELECT * FROM {tripal_pub_import} WHERE pub_import_id = :import_id ";
-    $import = db_query($sql, $args)->fetchObject();
-
-    print "Executing Importer: '" . $import->name . "'\n";
-
-    $criteria = unserialize($import->criteria);
-    $remote_db = $criteria['remote_db'];
-    $total_pubs = 0;
-    do {
-      // retrieve the pubs for this page. We'll retreive 100 at a time
-      $results  = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
-      $pubs     = $results['pubs'];
-      $num_pubs = $rseults['total_records'];
-      $total_pubs += $num_pubs;
-      tripal_pub_add_publications($pubs, $import->do_contact);
-      $page++;
-    }
-    // continue looping until we have a $pubs array that does not have
-    // our requested numer of records.  This means we've hit the end
-    while (count($pubs) == $num_to_retrieve);
-
-    // sync the newly added publications with Drupal. If the user
-    // requested a report then we don't want to print any syncing information
-    // so pass 'FALSE' to the sync call
-    print "Syncing publications with Drupal...\n";
-    chado_node_sync_records('pub');
-
-    // if any of the importers wanted to create contacts from the authors then sync them
-    if($import->do_contact) {
-      print "Syncing contacts with Drupal...\n";
-      chado_node_sync_records('contact');
-    }
-    tripal_set_job_progress($job_id, '100');
-  }
-  catch (Exception $e) {
-    $transaction->rollback();
-    print "\n"; // make sure we start errors on new line
-    watchdog_exception('T_pub_import', $e);
-    print "FAILED: Rolling back database changes...\n";
-    return;
-  }
-  print "Done.\n";
-}
-
-/**
- * Imports all publications for all active import setups.
- *
- * @param $report_email
- *   A list of email address, separated by commas, that should be notified
- *   once importing has completed
- * @param $do_update
- *   If set to TRUE then publications that already exist in the Chado database
- *   will be updated, whereas if FALSE only new publications will be added
- *
- * @ingroup tripal_pub_api
- */
-function tripal_execute_active_pub_importers($report_email = FALSE, $do_update = FALSE) {
-  $num_to_retrieve = 100;
-  $page = 0;
-
-  print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
-      "If the load fails or is terminated prematurely then the entire set of \n" .
-      "insertions/updates is rolled back and will not be found in the database\n\n";
-
-  // start the transaction
-  $transaction = db_transaction();
-
-  try {
-    // get all of the loaders
-    $args = array();
-    $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0 ";
-    $results = db_query($sql, $args);
-    $do_contact = FALSE;
-    $reports = array();
-    foreach ($results as $import) {
-      $page = 0;
-      print "Executing importer: '" . $import->name . "'\n";
-      // keep track if any of the importers want to create contacts from authors
-      if ($import->do_contact == 1) {
-        $do_contact = TRUE;
-      }
-      $criteria = unserialize($import->criteria);
-      $remote_db = $criteria['remote_db'];
-      do {
-        // retrieve the pubs for this page. We'll retreive 100 at a time
-        $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
-        $pubs = $results['pubs'];
-        $reports[$import->name] = tripal_pub_add_publications($pubs, $import->do_contact, $do_update);
-        $page++;
-      }
-      // continue looping until we have a $pubs array that does not have
-      // our requested numer of records.  This means we've hit the end
-      while (count($pubs) == $num_to_retrieve);
-    }
-
-    // sync the newly added publications with Drupal. If the user
-    // requested a report then we don't want to print any syncing information
-    // so pass 'FALSE' to the sync call
-    print "Syncing publications with Drupal...\n";
-    chado_node_sync_records('pub');
-
-    // iterate through each of the reports and generate a final report with HTML links
-    $HTML_report = '';
-    if ($report_email) {
-      $HTML_report .= "<html>";
-      global $base_url;
-      foreach ($reports as $importer => $report) {
-        $total = count($report['inserted']);
-        $HTML_report .= "<b>$total new publications from importer: $importer</b><br><ol>\n";
-        foreach ($report['inserted'] as $pub) {
-          $item = $pub['Title'];
-          if (array_key_exists('pub_id', $pub)) {
-            $item = l($pub['Title'], "$base_url/pub/" . $pub['pub_id']);
-          }
-          $HTML_report .= "<li>$item</li>\n";
-        }
-        $HTML_report .= "</ol>\n";
-      }
-      $HTML_report .= "</html>";
-      $site_email = variable_get('site_mail', '');
-      $params = array(
-        'message' => $HTML_report
-      );
-      drupal_mail('tripal_pub', 'import_report', $report_email, language_default(), $params, $site_email, TRUE);
-    }
-
-    // if any of the importers wanted to create contacts from the authors then sync them
-    if($do_contact) {
-      print "Syncing contacts with Drupal...\n";
-      chado_node_sync_records('contact');
-    }
-  }
-  catch (Exception $e) {
-    $transaction->rollback();
-    print "\n"; // make sure we start errors on new line
-    watchdog_exception('T_pub_import', $e);
-    print "FAILED: Rolling back database changes...\n";
-    return;
-  }
-  print "Done.\n";
-}
-
-/**
- * Imports a singe publication specified by a remote database cross reference.
- *
- * @param $pub_dbxref
- *   The unique database ID for the record to update.  This value must
- *   be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
- *   database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
- *   for the record in the database.
- * @param $do_contact
- *   Set to TRUE if authors should automatically have a contact record added
- *   to Chado.
- * @param $do_update
- *   If set to TRUE then the publication will be updated if it already exists
- *   in the database.
- *
- * @ingroup tripal_pub_api
- */
-function tripal_import_pub_by_dbxref($pub_dbxref, $do_contact = FALSE, $do_update) {
-  $num_to_retrieve = 1;
-  $pager_id = 0;
-  $page = 0;
-  $num_pubs = 0;
-
-  print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
-      "If the load fails or is terminated prematurely then the entire set of \n" .
-      "insertions/updates is rolled back and will not be found in the database\n\n";
-
-  $transaction = db_transaction();
-  try {
-    if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
-      $dbname = $matches[1];
-      $accession = $matches[2];
-
-      $criteria = array(
-        'num_criteria' => 1,
-        'remote_db' => $dbname,
-        'criteria' => array(
-          '1' => array(
-            'search_terms' => "$dbname:$accession",
-            'scope' => 'id',
-            'operation' => '',
-            'is_phrase' => 0,
-          ),
-        ),
-      );
-      $remote_db = $criteria['remote_db'];
-      $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
-      $pubs          = $results['pubs'];
-      $search_str    = $results['search_str'];
-      $total_records = $results['total_records'];
-      $pub_id = tripal_pub_add_publications($pubs, $do_contact, $do_update);
-    }
-
-    // sync the newly added publications with Drupal
-    print "Syncing publications with Drupal...\n";
-    chado_node_sync_records('pub');
-
-    // if any of the importers wanted to create contacts from the authors then sync them
-    if($do_contact) {
-      print "Syncing contacts with Drupal...\n";
-      chado_node_sync_records('contact');
-    }
-  }
-  catch (Exception $e) {
-    $transaction->rollback();
-    print "\n"; // make sure we start errors on new line
-    watchdog_exception('T_pub_import', $e);
-    print "FAILED: Rolling back database changes...\n";
-    return;
-  }
-
-  print "Done.\n";
-}

+ 448 - 0
tripal_pub/includes/tripal_pub.pub_importers.inc

@@ -1413,3 +1413,451 @@ function tripal_pub_get_publication_array($pub_id, $skip_existing = TRUE) {
   }
   return $pub_array;
 }
+
+/**
+ * Imports all publications for all active import setups.
+ *
+ * @param $report_email
+ *   A list of email address, separated by commas, that should be notified
+ *   once importing has completed
+ * @param $do_update
+ *   If set to TRUE then publications that already exist in the Chado database
+ *   will be updated, whereas if FALSE only new publications will be added
+ *
+ * @ingroup tripal_pub
+ */
+function tripal_execute_active_pub_importers($report_email = FALSE, $do_update = FALSE) {
+  $num_to_retrieve = 100;
+  $page = 0;
+
+  print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
+      "If the load fails or is terminated prematurely then the entire set of \n" .
+      "insertions/updates is rolled back and will not be found in the database\n\n";
+
+  // start the transaction
+  $transaction = db_transaction();
+
+  try {
+    // get all of the loaders
+    $args = array();
+    $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0 ";
+    $results = db_query($sql, $args);
+    $do_contact = FALSE;
+    $reports = array();
+    foreach ($results as $import) {
+      $page = 0;
+      print "Executing importer: '" . $import->name . "'\n";
+      // keep track if any of the importers want to create contacts from authors
+      if ($import->do_contact == 1) {
+        $do_contact = TRUE;
+      }
+      $criteria = unserialize($import->criteria);
+      $remote_db = $criteria['remote_db'];
+      do {
+        // retrieve the pubs for this page. We'll retreive 100 at a time
+        $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
+        $pubs = $results['pubs'];
+        $reports[$import->name] = tripal_pub_add_publications($pubs, $import->do_contact, $do_update);
+        $page++;
+      }
+      // continue looping until we have a $pubs array that does not have
+      // our requested numer of records.  This means we've hit the end
+      while (count($pubs) == $num_to_retrieve);
+    }
+
+    // sync the newly added publications with Drupal. If the user
+    // requested a report then we don't want to print any syncing information
+    // so pass 'FALSE' to the sync call
+    print "Syncing publications with Drupal...\n";
+    chado_node_sync_records('pub');
+
+    // iterate through each of the reports and generate a final report with HTML links
+    $HTML_report = '';
+    if ($report_email) {
+      $HTML_report .= "<html>";
+      global $base_url;
+      foreach ($reports as $importer => $report) {
+        $total = count($report['inserted']);
+        $HTML_report .= "<b>$total new publications from importer: $importer</b><br><ol>\n";
+        foreach ($report['inserted'] as $pub) {
+          $item = $pub['Title'];
+          if (array_key_exists('pub_id', $pub)) {
+            $item = l($pub['Title'], "$base_url/pub/" . $pub['pub_id']);
+          }
+          $HTML_report .= "<li>$item</li>\n";
+        }
+        $HTML_report .= "</ol>\n";
+      }
+      $HTML_report .= "</html>";
+      $site_email = variable_get('site_mail', '');
+      $params = array(
+        'message' => $HTML_report
+      );
+      drupal_mail('tripal_pub', 'import_report', $report_email, language_default(), $params, $site_email, TRUE);
+    }
+
+    // if any of the importers wanted to create contacts from the authors then sync them
+    if($do_contact) {
+      print "Syncing contacts with Drupal...\n";
+      chado_node_sync_records('contact');
+    }
+  }
+  catch (Exception $e) {
+    $transaction->rollback();
+    print "\n"; // make sure we start errors on new line
+    watchdog_exception('T_pub_import', $e);
+    print "FAILED: Rolling back database changes...\n";
+    return;
+  }
+  print "Done.\n";
+}
+
+/**
+ * Imports all publications for a given publication import setup.
+ *
+ * @param $import_id
+ *   The ID of the import setup to use
+ * @param $job_id
+ *   The jobs management job_id for the job if this function is run as a job.
+ *
+ * @ingroup tripal_pub
+ */
+function tripal_execute_pub_importer($import_id, $job_id = NULL) {
+  print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
+      "If the load fails or is terminated prematurely then the entire set of \n" .
+      "insertions/updates is rolled back and will not be found in the database\n\n";
+
+  // start the transaction
+  $transaction = db_transaction();
+
+  try {
+    $page = 0;
+    $do_contact = FALSE;
+    $num_to_retrieve = 100;
+
+    // get all of the loaders
+    $args = array(':import_id' => $import_id);
+    $sql = "SELECT * FROM {tripal_pub_import} WHERE pub_import_id = :import_id ";
+    $import = db_query($sql, $args)->fetchObject();
+
+    print "Executing Importer: '" . $import->name . "'\n";
+
+    $criteria = unserialize($import->criteria);
+    $remote_db = $criteria['remote_db'];
+    $total_pubs = 0;
+    do {
+      // retrieve the pubs for this page. We'll retreive 100 at a time
+      $results  = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
+      $pubs     = $results['pubs'];
+      $num_pubs = $rseults['total_records'];
+      $total_pubs += $num_pubs;
+      tripal_pub_add_publications($pubs, $import->do_contact);
+      $page++;
+    }
+    // continue looping until we have a $pubs array that does not have
+    // our requested numer of records.  This means we've hit the end
+    while (count($pubs) == $num_to_retrieve);
+
+    // sync the newly added publications with Drupal. If the user
+    // requested a report then we don't want to print any syncing information
+    // so pass 'FALSE' to the sync call
+    print "Syncing publications with Drupal...\n";
+    chado_node_sync_records('pub');
+
+    // if any of the importers wanted to create contacts from the authors then sync them
+    if($import->do_contact) {
+      print "Syncing contacts with Drupal...\n";
+      chado_node_sync_records('contact');
+    }
+    tripal_set_job_progress($job_id, '100');
+  }
+  catch (Exception $e) {
+    $transaction->rollback();
+    print "\n"; // make sure we start errors on new line
+    watchdog_exception('T_pub_import', $e);
+    print "FAILED: Rolling back database changes...\n";
+    return;
+  }
+  print "Done.\n";
+}
+
+/**
+ * This function is used to perfom a query using one of the supported databases
+ * and return the raw query results. This may be XML or some other format
+ * as provided by the database.
+ *
+ * @param $dbxref
+ *   The unique database ID for the record to retrieve.  This value must
+ *   be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
+ *   database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
+ *   for the record in the database.
+ *
+ * @return
+ *   Returns the publication array or FALSE if a problem occurs
+ *
+ * @ingroup tripal_pub
+ */
+function tripal_get_remote_pub($dbxref) {
+
+  if(preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
+    $remote_db = $matches[1];
+    $accession = $matches[2];
+
+    // check that the database is supported
+    $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
+    if(!in_array($remote_db, $supported_dbs)) {
+      return FALSE;
+    }
+
+    $search = array(
+      'num_criteria' => 1,
+      'remote_db' => $remote_db,
+      'criteria' => array(
+        '1' => array(
+          'search_terms' => "$remote_db:$accession",
+          'scope' => 'id',
+          'operation' => '',
+          'is_phrase' => 0,
+        ),
+      ),
+    );
+    $pubs = tripal_get_remote_pubs($remote_db, $search, 1, 0);
+
+    return $pubs['pubs'][0];
+  }
+  return FALSE;
+}
+
+/**
+ * Retrieves a list of publications as an associated array where
+ *  keys correspond directly with Tripal Pub CV terms.
+ *
+ * @param remote_db
+ *    The name of the remote publication database to query. These names should
+ *    match the name of the databases in the Chado 'db' table. Currently
+ *    supported databass include
+ *      'PMID':  PubMed
+ *
+ * @param search_array
+ *    An associate array containing the search criteria. The following key
+ *    are expected
+ *      'remote_db':     Specifies the name of the remote publication database
+ *      'num_criteria':  Specifies the number of criteria present in the search array
+ *      'days':          The number of days to include in the search starting from today
+ *      'criteria':      An associate array containing the search critiera. There should
+ *                       be no less than 'num_criteria' elements in this array.
+ *
+ *    The following keys are expected in the 'criteria' array
+ *      'search_terms':  A list of terms to search on, separated by spaces.
+ *      'scope':         The fields to search in the remote database. Valid values
+ *                       include: 'title', 'abstract', 'author' and 'any'
+ *      'operation':     The logical operation to use for this criteria. Valid
+ *                       values include: 'AND', 'OR' and 'NOT'.
+ * @param $num_to_retrieve
+ *    The number of records to retrieve.  In cases with large numbers of
+ *    records to retrieve, the remote database may limit the size of each
+ *    retrieval.
+ * @param $page
+ *    Optional.  If this function is called where the
+ *    page for the pager cannot be set using the $_GET variable, use this
+ *    argument to specify the page to retrieve.
+ *
+ * @return
+ *   Returns an array of pubs where each element is
+ *   an associative array where the keys are Tripal Pub CV terms.
+ *
+ * @ingroup tripal_pub
+ */
+function tripal_get_remote_pubs($remote_db, $search_array, $num_to_retrieve, $page = 0) {
+
+  // now call the callback function to get the results
+  $callback = "tripal_pub_remote_search_$remote_db";
+  $pubs =  array(
+    'total_records' => 0,
+    'search_str'    => '',
+    'pubs'          => array(),
+  );
+  if (function_exists($callback)) {
+    $pubs = call_user_func($callback, $search_array, $num_to_retrieve, $page);
+  }
+  return $pubs;
+}
+
+/**
+ * Imports a singe publication specified by a remote database cross reference.
+ *
+ * @param $pub_dbxref
+ *   The unique database ID for the record to update.  This value must
+ *   be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
+ *   database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
+ *   for the record in the database.
+ * @param $do_contact
+ *   Set to TRUE if authors should automatically have a contact record added
+ *   to Chado.
+ * @param $do_update
+ *   If set to TRUE then the publication will be updated if it already exists
+ *   in the database.
+ *
+ * @ingroup tripal_pub
+ */
+function tripal_import_pub_by_dbxref($pub_dbxref, $do_contact = FALSE, $do_update) {
+  $num_to_retrieve = 1;
+  $pager_id = 0;
+  $page = 0;
+  $num_pubs = 0;
+
+  print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
+      "If the load fails or is terminated prematurely then the entire set of \n" .
+      "insertions/updates is rolled back and will not be found in the database\n\n";
+
+  $transaction = db_transaction();
+  try {
+    if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
+      $dbname = $matches[1];
+      $accession = $matches[2];
+
+      $criteria = array(
+        'num_criteria' => 1,
+        'remote_db' => $dbname,
+        'criteria' => array(
+          '1' => array(
+            'search_terms' => "$dbname:$accession",
+            'scope' => 'id',
+            'operation' => '',
+            'is_phrase' => 0,
+          ),
+        ),
+      );
+      $remote_db = $criteria['remote_db'];
+      $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
+      $pubs          = $results['pubs'];
+      $search_str    = $results['search_str'];
+      $total_records = $results['total_records'];
+      $pub_id = tripal_pub_add_publications($pubs, $do_contact, $do_update);
+    }
+
+    // sync the newly added publications with Drupal
+    print "Syncing publications with Drupal...\n";
+    chado_node_sync_records('pub');
+
+    // if any of the importers wanted to create contacts from the authors then sync them
+    if($do_contact) {
+      print "Syncing contacts with Drupal...\n";
+      chado_node_sync_records('contact');
+    }
+  }
+  catch (Exception $e) {
+    $transaction->rollback();
+    print "\n"; // make sure we start errors on new line
+    watchdog_exception('T_pub_import', $e);
+    print "FAILED: Rolling back database changes...\n";
+    return;
+  }
+
+  print "Done.\n";
+}
+
+/**
+ * Updates publication records that currently exist in the Chado pub table
+ * with the most recent data in the remote database.
+ *
+ * @param $do_contact
+ *   Set to TRUE if authors should automatically have a contact record added
+ *   to Chado. Contacts are added using the name provided by the remote
+ *   database.
+ * @param $dbxref
+ *   The unique database ID for the record to update.  This value must
+ *   be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
+ *   database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
+ *   for the record in the database.
+ * @param $db
+ *   The name of the remote database to update.  If this value is provided and
+ *   no dbxref then all of the publications currently in the Chado database
+ *   for this remote database will be updated.
+ *
+ * @ingroup tripal_pub
+ */
+function tripal_reimport_publications($do_contact = FALSE, $dbxref = NULL, $db = NULL) {
+
+  print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
+      "If the load fails or is terminated prematurely then the entire set of \n" .
+      "insertions/updates is rolled back and will not be found in the database\n\n";
+  $transaction = db_transaction();
+  try {
+
+    // get a list of all publications by their Dbxrefs that have supported databases
+    $sql = "
+      SELECT DB.name as db_name, DBX.accession
+      FROM pub P
+        INNER JOIN pub_dbxref PDBX ON P.pub_id = PDBX.pub_id
+        INNER JOIN dbxref DBX      ON DBX.dbxref_id = PDBX.dbxref_id
+        INNER JOIN db DB           ON DB.db_id = DBX.db_id
+    ";
+    $args = array();
+    if ($dbxref and preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
+      $dbname = $matches[1];
+      $accession = $matches[2];
+      $sql .= "WHERE DBX.accession = :accession and DB.name = :dbname ";
+      $args[':accession'] = $accession;
+      $args[':dbname'] = $dbname;
+    }
+    elseif ($db) {
+      $sql .= " WHERE DB.name = :dbname ";
+      $args[':dbname'] = $db;
+    }
+    $sql .= "ORDER BY DB.name, P.pub_id";
+    $results = chado_query($sql, $args);
+
+    $num_to_retrieve = 100;
+    $i = 0;                 // count the number of IDs. When we hit $num_to_retrieve we'll do the query
+    $curr_db = '';          // keeps track of the current current database
+    $ids = array();         // the list of IDs for the database
+    $search = array();      // the search array passed to the search function
+
+    // iterate through the pub IDs
+    while ($pub = $results->fetchObject()) {
+      $accession = $pub->accession;
+      $remote_db = $pub->db_name;
+
+      // here we need to only update publications for databases we support
+      $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
+      if(!in_array($remote_db, $supported_dbs)) {
+        continue;
+      }
+      $search = array(
+        'num_criteria' => 1,
+        'remote_db' => $remote_db,
+        'criteria' => array(
+          '1' => array(
+            'search_terms' => "$remote_db:$accession",
+            'scope' => 'id',
+            'operation' => '',
+            'is_phrase' => 0,
+          ),
+        ),
+      );
+      $pubs = tripal_get_remote_pubs($remote_db, $search, 1, 0);
+      tripal_pub_add_publications($pubs, $do_contact, TRUE);
+
+      $i++;
+    }
+
+    // sync the newly added publications with Drupal
+    print "Syncing publications with Drupal...\n";
+    chado_node_sync_records('pub');
+
+    // if the caller wants to create contacts then we should sync them
+    if ($do_contact) {
+      print "Syncing contacts with Drupal...\n";
+      chado_node_sync_records('contact');
+    }
+  }
+  catch (Exception $e) {
+    $transaction->rollback();
+    print "\n"; // make sure we start errors on new line
+    watchdog_exception('T_pub_import', $e);
+    print "FAILED: Rolling back database changes...\n";
+    return;
+  }
+  print "Done.\n";
+}

+ 149 - 0
tripal_pub/includes/tripal_pub.pub_search.inc

@@ -532,3 +532,152 @@ function theme_tripal_pub_search_setup_form_elements($variables) {
   $results .= '</div>';
   return $results;
 }
+
+
+/**
+ * Builds the SQL statement need to search Chado for the publications
+ * that match the user supplied criteria.  Tpyically, this function is
+ * called by the search form generated by the tripal_pub_search_form() function
+ * but this function is included in the API for calling by anyone.
+ *
+ * @param $search_array
+ *   An array of search criteria provided by the user. The search array is
+ *   an associative array with the following keys:
+ *     'num_criteria': an integer indicating the number of search criteria supplied
+ *     'from_year':    filters records by a start year
+ *     'to_year':      filters records by an end year
+ *     'criteria':     an array of criteria. Each criteria is an associative
+ *                     array with the following keys:
+ *                     'search_terms':   The text used for searching
+ *                     'scope':          The cvterm_id of the property used for filtering
+ *                     'mode':           The operation (e.g. AND, OR or NOT)
+ * @param $offset
+ *   The offset for paging records.  The first record returned will be
+ *   at the offset indicated here, and the next $limit number of records
+ *   will be returned.
+ *
+ * @param $limit
+ *   The number of records to retrieve
+ *
+ * @param total_records
+ *   A value passed by reference. This value will get set to the total
+ *   number of matching records
+ *
+ * @return
+ *   a PDO database object of the query results.
+ *
+ * @ingroup tripal_pub
+ */
+function tripal_search_publications($search_array, $offset, $limit, &$total_records) {
+
+  // build the SQL based on the criteria provided by the user
+  $select = "SELECT DISTINCT P.*, CP.nid ";
+  $from   = "FROM {pub} P
+               LEFT JOIN public.chado_pub CP on P.pub_id = CP.pub_id
+               INNER JOIN {cvterm} CVT on CVT.cvterm_id = P.type_id
+            ";
+  $where  = "WHERE (NOT P.title = 'null') "; // always exclude the dummy pub
+  $order  = "ORDER BY P.pyear DESC, P.title ASC";
+  $args = array();  // arguments for where clause
+  $join = 0;
+
+  $num_criteria = $search_array['num_criteria'];
+  $from_year    = $search_array['from_year'];
+  $to_year      = $search_array['to_year'];
+
+  for ($i = 1; $i <= $num_criteria; $i++) {
+    $value = $search_array['criteria'][$i]['search_terms'];
+    $type_id = $search_array['criteria'][$i]['scope'];
+    $mode = $search_array['criteria'][$i]['mode'];
+    $op = $search_array['criteria'][$i]['operation'];
+
+    // skip criteria with no values
+    if(!$value) {
+      continue;
+    }
+
+    // to prevent SQL injection make sure our operator is
+    // what we expect
+    if ($op and $op != "AND" and $op != "OR" and $op != 'NOT') {
+      $op = 'AND';
+    }
+    if ($op == 'NOT') {
+      $op = 'AND NOT';
+    }
+    if (!$op) {
+      $op = 'AND';
+    }
+
+    // get the scope type
+    $values = array('cvterm_id' => $type_id);
+    $cvterm = chado_select_record('cvterm', array('name'), $values);
+    $type_name = '';
+    if (count($cvterm) > 0) {
+      $type_name = $cvterm[0]->name;
+    }
+    if ($type_name == 'Title') {
+      $where .= " $op (lower(P.title) LIKE lower(:crit$i)) ";
+      $args[":crit$i"] = '%' . $value . '%';
+    }
+    elseif ($type_name == 'Year') {
+      $where .= " $op (lower(P.pyear) = lower(:crit$i)) ";
+      $args[":crit$i"] = '%' . $value . '%';
+    }
+    elseif ($type_name == 'Volume') {
+      $where .= " $op (lower(P.volume) = lower(:crit$i)) ";
+      $args[":crit$i"] = '%' . $value . '%';
+    }
+    elseif ($type_name == 'Issue') {
+      $where .= " $op (lower(P.issue) = lower(:crit$i)) ";
+      $args[":crit$i"] = '%' . $value . '%';
+    }
+    elseif ($type_name == 'Journal Name') {
+      $from .= " LEFT JOIN {pubprop} PP$i ON PP$i.pub_id = P.pub_id AND PP$i.type_id = :crit$i ";
+      $where .= " $op ((lower(P.series_name) = lower(:crit$i) and CVT.name = 'Journal Article') OR
+      (lower(PP$i.value) = lower(:crit$i))) ";
+      $args[":crit$i"] = $type_id;
+    }
+    elseif ($type_name == 'Conference Name') {
+      $from .= " LEFT JOIN {pubprop} PP$i ON PP$i.pub_id = P.pub_id AND PP$i.type_id = :crit$i ";
+      $where .= " $op ((lower(P.series_name) = lower(:crit$i) and CVT.name = 'Conference Proceedings') OR
+      (lower(PP$i.value) = lower(:crit$i))) ";
+      $args[":crit$i"] = $type_id;
+    }
+    elseif ($type_name == 'Publication Type') {
+      $where .= " $op (lower(CVT.name) = lower(:crit$i))";
+      $args[":crit$i"] = $value;
+    }
+    elseif ($type_id == 0) { //'Any Field'
+      $from .= " LEFT JOIN {pubprop} PP$i ON PP$i.pub_id = P.pub_id ";
+      $where .= " $op (lower(PP$i.value)  LIKE lower(:crit$i) OR
+      lower(P.title) LIKE lower(:crit$i) OR
+      lower(P.volumetitle) LIKE lower(:crit$i) OR
+      lower(P.publisher) LIKE lower(:crit$i) OR
+      lower(P.uniquename) LIKE lower(:crit$i) OR
+      lower(P.pubplace) LIKE lower(:crit$i) OR
+      lower(P.miniref) LIKE lower(:crit$i) OR
+      lower(P.series_name) LIKE lower(:crit$i)) ";
+      $args[":crit$i"] = '%' . $value . '%';
+    }
+    // for all other properties
+    else {
+      $from .= " LEFT JOIN {pubprop} PP$i ON PP$i.pub_id = P.pub_id AND PP$i.type_id = :type_id$i ";
+      $where .= " $op (lower(PP$i.value) LIKE lower(:crit$i)) ";
+      $args[":crit$i"] = '%' . $value . '%';
+      $args[":type_id$i"] = $type_id;
+    }
+  }
+  if($from_year and $to_year) {
+    $where .= " AND (P.pyear ~ '....' AND to_number(P.pyear,'9999') >= :from$i AND to_number(P.pyear,'9999') <= :to$i) ";
+    $args[":from$i"] = $from_year;
+    $args[":to$i"] = $to_year;
+  }
+  $sql = "$select $from $where $order  LIMIT " . (int) $limit . ' OFFSET ' . (int) $offset;
+  $count = "SELECT count(*) FROM ($select $from $where $order) as t1";
+
+  // first get the total number of matches
+  $total_records = chado_query($count, $args)->fetchField();
+  $results = chado_query($sql, $args);
+
+  return $results;
+}