|
@@ -0,0 +1,430 @@
|
|
|
+<?php
|
|
|
+
|
|
|
+/**
|
|
|
+ * @file
|
|
|
+ * @todo Add file header description
|
|
|
+ */
|
|
|
+
|
|
|
+
|
|
|
+# This script can be run as a stand-alone script to sync all the stocks from chado to drupal
|
|
|
+// Parameter f specifies the stock_id to sync
|
|
|
+// -f 0 will sync all stocks
|
|
|
+
|
|
|
+$arguments = getopt("f:t:");
|
|
|
+
|
|
|
+if (isset($arguments['f']) and isset($arguments['t']) and $arguments['t'] == 'chado_stock') {
|
|
|
+ $drupal_base_url = parse_url('http://www.example.com');
|
|
|
+ $_SERVER['HTTP_HOST'] = $drupal_base_url['host'];
|
|
|
+ $_SERVER['REQUEST_URI'] = $_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'];
|
|
|
+ $_SERVER['REMOTE_ADDR'] = NULL;
|
|
|
+ $_SERVER['REQUEST_METHOD'] = NULL;
|
|
|
+
|
|
|
+ require_once 'includes/bootstrap.inc';
|
|
|
+ drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
|
|
|
+
|
|
|
+ $stock_id = $arguments['f'];
|
|
|
+
|
|
|
+ if ($stock_id > 0 ) {
|
|
|
+ tripal_stock_sync_stock($stock_id);
|
|
|
+ }
|
|
|
+ else{
|
|
|
+ print "syncing all stocks...\n";
|
|
|
+ tripal_stock_sync_stocks();
|
|
|
+ }
|
|
|
+}
|
|
|
+/**
|
|
|
+ *
|
|
|
+ */
|
|
|
+function tripal_stock_sync_form() {
|
|
|
+
|
|
|
+ $form['description'] = array(
|
|
|
+ '#type' => 'item',
|
|
|
+ '#value' => t("Stocks of the types listed ".
|
|
|
+ "below in the Stock Types box will be synced (leave blank to sync all types). You may limit the ".
|
|
|
+ "stocks to be synced by a specific organism. Depending on the ".
|
|
|
+ "number of stocks in the chado database this may take a long ".
|
|
|
+ "time to complete. "),
|
|
|
+ );
|
|
|
+
|
|
|
+ $form['stock_types'] = array(
|
|
|
+ '#title' => t('Stock Types'),
|
|
|
+ '#type' => 'textarea',
|
|
|
+ '#description' => t("Enter the names of the stock types to sync. " .
|
|
|
+ "Leave blank to sync all stocks. Pages for these stock ".
|
|
|
+ "types will be created automatically for stocks that exist in the ".
|
|
|
+ "chado database. The names listed here should be spearated by ".
|
|
|
+ "spaces or entered separately on new lines. The names must match ".
|
|
|
+ "exactly (spelling and case) with terms in the sequence ontology"),
|
|
|
+ '#default_value' => variable_get('chado_sync_stock_types', ''),
|
|
|
+ );
|
|
|
+
|
|
|
+ // get the list of organisms
|
|
|
+ $sql = "SELECT * FROM {organism} ORDER BY genus, species";
|
|
|
+ $orgs = tripal_organism_get_synced();
|
|
|
+ $organisms[] = '';
|
|
|
+ foreach ($orgs as $organism) {
|
|
|
+ $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
|
|
|
+ }
|
|
|
+ $form['organism_id'] = array(
|
|
|
+ '#title' => t('Organism'),
|
|
|
+ '#type' => t('select'),
|
|
|
+ '#description' => t("Choose the organism for which stocks types set above will be synced. Only organisms which also have been synced will appear in this list."),
|
|
|
+ '#options' => $organisms,
|
|
|
+ );
|
|
|
+
|
|
|
+
|
|
|
+ $form['button'] = array(
|
|
|
+ '#type' => 'submit',
|
|
|
+ '#value' => t('Sync all Stocks'),
|
|
|
+ '#weight' => 3,
|
|
|
+ );
|
|
|
+
|
|
|
+ return $form;
|
|
|
+}
|
|
|
+/**
|
|
|
+ *
|
|
|
+ */
|
|
|
+function tripal_stock_sync_form_validate($form, &$form_state) {
|
|
|
+ $organism_id = $form_state['values']['organism_id'];
|
|
|
+ $stock_types = $form_state['values']['stock_types'];
|
|
|
+
|
|
|
+ // nothing to do
|
|
|
+}
|
|
|
+/**
|
|
|
+ *
|
|
|
+ */
|
|
|
+function tripal_stock_sync_form_submit($form, &$form_state) {
|
|
|
+
|
|
|
+ global $user;
|
|
|
+
|
|
|
+ $organism_id = $form_state['values']['organism_id'];
|
|
|
+ $stock_types = $form_state['values']['stock_types'];
|
|
|
+
|
|
|
+ $job_args = array(0, $organism_id, $stock_types);
|
|
|
+
|
|
|
+ if ($organism_id) {
|
|
|
+ $organism = tripal_core_chado_select('organism', array('genus', 'species'), array('organism_id' => $organism_id));
|
|
|
+ $title = "Sync stocks for " . $organism[0]->genus . " " . $organism[0]->species;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $title = 'Sync stocks';
|
|
|
+ }
|
|
|
+
|
|
|
+ variable_set('chado_sync_stock_types', $stock_types);
|
|
|
+
|
|
|
+ tripal_add_job($title, 'tripal_stock', 'tripal_stock_sync_stocks', $job_args, $user->uid);
|
|
|
+}
|
|
|
+/**
|
|
|
+ *
|
|
|
+ */
|
|
|
+function tripal_stock_set_urls($job_id = NULL) {
|
|
|
+
|
|
|
+ // first get the list of stocks that have been synced
|
|
|
+ $sql = "SELECT * FROM {chado_stock}";
|
|
|
+ $nodes = db_query($sql);
|
|
|
+ while ($node = db_fetch_object($nodes)) {
|
|
|
+
|
|
|
+ print "Setting URL alias for stock $stock->name: node/$node->nid => $url_alias\n";
|
|
|
+
|
|
|
+ // remove any previous alias
|
|
|
+ db_query("DELETE FROM {url_alias} WHERE src = '%s'", "node/$node->nid");
|
|
|
+
|
|
|
+ // add the new alias
|
|
|
+ $url_alias = tripal_stock_get_stock_url($node);
|
|
|
+ path_set_alias("node/$node->nid", $url_alias);
|
|
|
+ }
|
|
|
+}
|
|
|
+/**
|
|
|
+ *
|
|
|
+ */
|
|
|
+function tripal_stock_get_stock_url($node) {
|
|
|
+
|
|
|
+ // determine which URL alias to use
|
|
|
+ $alias_type = variable_get('chado_stock_url', 'internal ID');
|
|
|
+ $aprefix = variable_get('chado_stock_accession_prefix', 'SID');
|
|
|
+
|
|
|
+ // get the stock
|
|
|
+ $values = array('stock_id' => $node->stock_id);
|
|
|
+ $stock = tripal_core_chado_select('stock', array('*'), $values);
|
|
|
+
|
|
|
+ $stock = (object) $stock[0];
|
|
|
+
|
|
|
+ // get the organism
|
|
|
+ $values = array('organism_id' => $stock->organism_id);
|
|
|
+ $organism = tripal_core_chado_select('organism', array('*'), $values);
|
|
|
+ $genus = preg_replace('/\s/', '_', strtolower($organism[0]->genus));
|
|
|
+ $species = preg_replace('/\s/', '_', strtolower($organism[0]->species));
|
|
|
+
|
|
|
+ // get the type
|
|
|
+ $values = array('cvterm_id' => $stock->type_id);
|
|
|
+ $cvterm = tripal_core_chado_select('cvterm', array('name'), $values);
|
|
|
+ $type = preg_replace('/\s/', '_', $cvterm[0]->name);
|
|
|
+
|
|
|
+ switch ($alias_type) {
|
|
|
+ case 'stock_name':
|
|
|
+ $url_alias = $stock->name;
|
|
|
+ break;
|
|
|
+ case 'stock_unique_name':
|
|
|
+ $url_alias = $stock->uniquename;
|
|
|
+ break;
|
|
|
+ case 'genus_species_uqname':
|
|
|
+ $url_alias = $genus . "/" . $genus . "_" . $species . "/" . $stock->uniquename;
|
|
|
+ break;
|
|
|
+ case 'genus_species_name':
|
|
|
+ $url_alias = $genus . "/" . $genus . "_" . $species . "/" . $stock->sname;
|
|
|
+ break;
|
|
|
+ case 'genus_species_type_uname':
|
|
|
+ $url_alias = $genus . "/" . $genus . "_" . $species . "/" . $type . "/" . $stock->uniquename;
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ $url_alias = "$aprefix$node->stock_id";
|
|
|
+ }
|
|
|
+
|
|
|
+ return $url_alias;
|
|
|
+}
|
|
|
+/**
|
|
|
+ *
|
|
|
+ *
|
|
|
+ * @ingroup tripal_stock
|
|
|
+ */
|
|
|
+function tripal_stock_sync_stocks($max_sync = 0, $organism_id = NULL,
|
|
|
+ $stock_types = NULL, $job_id = NULL) {
|
|
|
+ //print "Syncing stocks (max of $max_sync)\n";
|
|
|
+ $i = 0;
|
|
|
+
|
|
|
+ // get the list of available sequence ontology terms for which
|
|
|
+ // we will build drupal pages from stocks in chado. If a stock
|
|
|
+ // is not one of the specified typse we won't build a node for it.
|
|
|
+ if (!$stock_types) {
|
|
|
+ $allowed_types = variable_get('chado_sync_stock_types', '');
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $allowed_types = $stock_types;
|
|
|
+ }
|
|
|
+
|
|
|
+ if ($allowed_types) {
|
|
|
+ $allowed_types = preg_replace("/[\s\n\r]+/", " ", $allowed_types);
|
|
|
+ print "Looking for stocks of type: $allowed_types\n";
|
|
|
+
|
|
|
+ $so_terms = split(' ', $allowed_types);
|
|
|
+ $where_cvt = "";
|
|
|
+ foreach ($so_terms as $term) {
|
|
|
+ $where_cvt .= "CVT.name = '$term' OR ";
|
|
|
+ }
|
|
|
+ $where_cvt = drupal_substr($where_cvt, 0, drupal_strlen($where_cvt)-3); # strip trailing 'OR'
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $where_cvt = '1=1';
|
|
|
+ }
|
|
|
+
|
|
|
+ // get the list of organisms that are synced and only include stocks from
|
|
|
+ // those organisms
|
|
|
+ $orgs = tripal_organism_get_synced();
|
|
|
+ $where_org = "";
|
|
|
+ foreach ($orgs as $org) {
|
|
|
+ if ($organism_id) {
|
|
|
+ if ($org->organism_id and $org->organism_id == $organism_id) {
|
|
|
+ $where_org .= "S.organism_id = $org->organism_id OR ";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ if ($org->organism_id) {
|
|
|
+ $where_org .= "S.organism_id = $org->organism_id OR ";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ $where_org = drupal_substr($where_org, 0, drupal_strlen($where_org)-3); # strip trailing 'OR'
|
|
|
+
|
|
|
+ // use this SQL statement to get the stocks that we're going to upload
|
|
|
+ $sql = "SELECT stock_id ".
|
|
|
+ "FROM {stock} S ".
|
|
|
+ " INNER JOIN {cvterm} CVT ON S.type_id = CVT.cvterm_id ".
|
|
|
+ "WHERE ($where_cvt) AND ($where_org) ".
|
|
|
+ "ORDER BY stock_id";
|
|
|
+
|
|
|
+ // get the list of stocks
|
|
|
+ $results = chado_query($sql);
|
|
|
+
|
|
|
+ // load into ids array
|
|
|
+ $count = 0;
|
|
|
+ $ids = array();
|
|
|
+ while ($id = db_fetch_object($results)) {
|
|
|
+ $ids[$count] = $id->stock_id;
|
|
|
+ $count++;
|
|
|
+ }
|
|
|
+
|
|
|
+ // make sure our vocabularies are set before proceeding
|
|
|
+// tripal_stock_set_vocabulary();
|
|
|
+
|
|
|
+ // pre-create the SQL statement that will be used to check
|
|
|
+ // if a stock has already been synced. We skip stocks
|
|
|
+ // that have been synced
|
|
|
+ $sql = "SELECT * FROM {chado_stock} WHERE stock_id = %d";
|
|
|
+
|
|
|
+ // Iterate through stocks that need to be synced
|
|
|
+ $interval = intval($count * 0.01);
|
|
|
+ if ($interval < 1) {
|
|
|
+ $interval = 1;
|
|
|
+ }
|
|
|
+ $num_ids = sizeof($ids);
|
|
|
+ $i = 0;
|
|
|
+ foreach ($ids as $stock_id) {
|
|
|
+ // update the job status every 1% stocks
|
|
|
+ if ($job_id and $i % $interval == 0) {
|
|
|
+ tripal_job_set_progress($job_id, intval(($i/$count)*100));
|
|
|
+ }
|
|
|
+ // if we have a maximum number to sync then stop when we get there
|
|
|
+ // if not then just continue on
|
|
|
+ if ($max_sync and $i == $max_sync) {
|
|
|
+ return '';
|
|
|
+ }
|
|
|
+ if (!db_fetch_object(db_query($sql, $stock_id))) {
|
|
|
+
|
|
|
+ # parsing all the stocks can cause memory overruns
|
|
|
+ # we are not sure why PHP does not clean up the memory as it goes
|
|
|
+ # to avoid this problem we will call this script through an
|
|
|
+ # independent system call
|
|
|
+ print ($i + 1) . " of $num_ids Syncing stock id: $stock_id\n";
|
|
|
+ $cmd = "php " . drupal_get_path('module', 'tripal_stock') . "/includes/tripal_stock.sync_stocks.inc -f $stock_id -t chado_stock";
|
|
|
+ print "$cmd\n";
|
|
|
+ system($cmd);
|
|
|
+
|
|
|
+ }
|
|
|
+ $i++;
|
|
|
+ }
|
|
|
+
|
|
|
+ return '';
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ *
|
|
|
+ *
|
|
|
+ * @ingroup tripal_stock
|
|
|
+ */
|
|
|
+function tripal_stock_sync_stock($stock_id) {
|
|
|
+
|
|
|
+print "\tSyncing stock $stock_id\n";
|
|
|
+
|
|
|
+global $user;
|
|
|
+ $create_node = 1; // set to 0 if the node exists and we just sync and not create
|
|
|
+
|
|
|
+ // get the accession prefix
|
|
|
+ $aprefix = variable_get('chado_stock_accession_prefix', 'SID');
|
|
|
+
|
|
|
+ // if we don't have a stock_id then return
|
|
|
+ if (!$stock_id) {
|
|
|
+ drupal_set_message(t("Please provide a stock_id to sync"));
|
|
|
+ return '';
|
|
|
+ }
|
|
|
+
|
|
|
+ // get information about this stock
|
|
|
+ $fsql = "SELECT S.*, O.genus, O.species,CVT.name as cvname ".
|
|
|
+ "FROM {stock} S ".
|
|
|
+ " INNER JOIN {cvterm} CVT ON S.type_id = CVT.cvterm_id ".
|
|
|
+ " INNER JOIN {organism} O ON S.organism_id = O.organism_ID ".
|
|
|
+ "WHERE S.stock_id = %d";
|
|
|
+ $stock = db_fetch_object(chado_query($fsql, $stock_id));
|
|
|
+
|
|
|
+ /*
|
|
|
+ // get the synonyms for this stock
|
|
|
+ $synsql = "SELECT S.name ".
|
|
|
+ "FROM {stock_synonym} SS ".
|
|
|
+ " INNER JOIN {synonym} S on SS.synonym_id = S.synonym_id ".
|
|
|
+ "WHERE SS.stock_id = %d";
|
|
|
+ $synonyms = chado_query($synsql, $stock_id);
|
|
|
+
|
|
|
+ // now add these synonyms to the stock object as a single string
|
|
|
+ $synstring = '';
|
|
|
+ while ($synonym = db_fetch_object($synonyms)) {
|
|
|
+ $synstring .= "$synonym->name\n";
|
|
|
+ }
|
|
|
+ $stock->synonyms = $synstring;
|
|
|
+ */
|
|
|
+
|
|
|
+ // check to make sure that we don't have any nodes with this stock name as a title
|
|
|
+ // but without a corresponding entry in the chado_stock table if so then we want to
|
|
|
+ // clean up that node. (If a node is found we don't know if it belongs to our stock or
|
|
|
+ // not since stocks can have the same name/title.)
|
|
|
+ $tsql = "SELECT * FROM {node} N ".
|
|
|
+ "WHERE title = '%s'";
|
|
|
+ $cnsql = "SELECT * FROM {chado_stock} ".
|
|
|
+ "WHERE nid = %d";
|
|
|
+ $nodes = db_query($tsql, $stock->name);
|
|
|
+ // cycle through all nodes that may have this title
|
|
|
+ while ($node = db_fetch_object($nodes)) {
|
|
|
+ $stock_nid = db_fetch_object(db_query($cnsql, $node->nid));
|
|
|
+ if (!$stock_nid) {
|
|
|
+ drupal_set_message(t("%stock_id: A node is present but the chado_stock entry is missing... correcting", array('%stock_id' => $stock_id)));
|
|
|
+ node_delete($node->nid);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // check if this stock already exists in the chado_stock table.
|
|
|
+ // if we have a chado stock, we want to check to see if we have a node
|
|
|
+ $cfsql = "SELECT * FROM {chado_stock} ".
|
|
|
+ "WHERE stock_id = %d";
|
|
|
+ $nsql = "SELECT * FROM {node} N ".
|
|
|
+ "WHERE nid = %d";
|
|
|
+ $chado_stock = db_fetch_object(db_query($cfsql, $stock->stock_id));
|
|
|
+ if ($chado_stock) {
|
|
|
+ drupal_set_message(t("%stock_id: A chado_stock entry exists", array('%stock_id' => $stock_id)));
|
|
|
+ $node = db_fetch_object(db_query($nsql, $chado_stock->nid));
|
|
|
+ if (!$node) {
|
|
|
+ // if we have a chado_stock but not a node then we have a problem and
|
|
|
+ // need to cleanup
|
|
|
+ drupal_set_message(t("%stock_id: The node is missing, but has a chado_stock entry... correcting", array('%stock_id' => $stock_id)));
|
|
|
+ $df_sql = "DELETE FROM {chado_stock} WHERE stock_id = %d";
|
|
|
+ db_query($df_sql, $stock_id);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ drupal_set_message(t("%stock_id: A corresponding node exists", array('%stock_id' => $stock_id)));
|
|
|
+ $create_node = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // if we've encountered an error then just return.
|
|
|
+ if ($error_msg = db_error()) {
|
|
|
+ //print "$error_msg\n";
|
|
|
+ return '';
|
|
|
+ }
|
|
|
+
|
|
|
+ // if a drupal node does not exist for this stock then we want to
|
|
|
+ // create one. Note that the node_save call in this block
|
|
|
+ // will call the hook_submit function which
|
|
|
+ if ($create_node) {
|
|
|
+ // get the organism for this stock
|
|
|
+ $sql = "SELECT * FROM {organism} WHERE organism_id = %d";
|
|
|
+ $organism = db_fetch_object(chado_query($sql, $stock->organism_id));
|
|
|
+
|
|
|
+ drupal_set_message(t("%stock_id: Creating node $stock->name", array('%stock_id' => $stock_id)));
|
|
|
+ $new_node = new stdClass();
|
|
|
+ $new_node->type = 'chado_stock';
|
|
|
+ $new_node->uid = $user->uid;
|
|
|
+ $new_node->title = "$stock->name, $stock->uniquename ($stock->cvname) $organism->genus $organism->species";
|
|
|
+ $new_node->sname = "$stock->name";
|
|
|
+ $new_node->uniquename = "$stock->uniquename";
|
|
|
+ $new_node->type_id = $stock->type_id;
|
|
|
+ $new_node->organism_id = $stock->organism_id;
|
|
|
+ $new_node->stock_id = $stock->stock_id;
|
|
|
+ $new_node->chado_stock_exists = TRUE;
|
|
|
+
|
|
|
+ // validate the node and if okay then submit
|
|
|
+ node_validate($new_node);
|
|
|
+ if ($errors = form_get_errors()) {
|
|
|
+ print "Error encountered validating new node. Cannot sync: $msg\n";
|
|
|
+ foreach ($errors as $key => $msg) {
|
|
|
+ watchdog('trp-fsync', "%msg", array('%msg' => $msg), 'error');
|
|
|
+ }
|
|
|
+ exit;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $node = node_submit($new_node);
|
|
|
+ node_save($node);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $node = $chado_stock;
|
|
|
+ }
|
|
|
+
|
|
|
+ return '';
|
|
|
+}
|