123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929 |
- <?php
-
- function tripal_pub_get_remote_search_results($remote_db, $search_array,
- $num_to_retrieve, $pager_id = 0, $page = 0) {
-
-
- if (is_int($page) and $page > 0) {
- $_GET['page'] = $page;
- }
-
- $callback = "tripal_pub_remote_search_$remote_db";
- $pubs = array();
- if (function_exists($callback)) {
- $pubs = call_user_func($callback, $search_array, $num_to_retrieve, $pager_id);
- }
-
- return $pubs;
- }
- function tripal_pub_get_raw_data($dbxref) {
-
- if(preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
- $remote_db = $matches[1];
- $accession = $matches[2];
-
-
- $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
- if(!in_array($remote_db, $supported_dbs)) {
- return "Unsupported database: $dbxref";
- }
- $search = array(
- 'num_criteria' => 1,
- 'remote_db' => $remote_db,
- 'criteria' => array(
- '1' => array(
- 'search_terms' => "$remote_db:$accession",
- 'scope' => 'id',
- 'operation' => '',
- 'is_phrase' => 0,
- ),
- ),
- );
- $pubs = tripal_pub_get_remote_search_results($remote_db, $search, 1, 0);
-
- return '<textarea cols=80 rows=20>' . $pubs[0]['raw'] . '</textarea>';
- }
- return 'Invalid DB xref';
- }
- function tripal_pub_update_publications($do_contact = FALSE, $dbxref = NULL, $db = NULL) {
-
-
- $connection = tripal_db_persistent_chado();
- if (!$connection) {
- print "A persistant connection was not obtained. Loading will be slow\n";
- }
-
-
- tripal_db_start_transaction();
- if ($connection) {
- print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
- "If the load fails or is terminated prematurely then the entire set of \n" .
- "insertions/updates is rolled back and will not be found in the database\n\n";
- }
-
-
- $sql = "
- SELECT DB.name as db_name, DBX.accession
- FROM pub P
- INNER JOIN pub_dbxref PDBX ON P.pub_id = PDBX.pub_id
- INNER JOIN dbxref DBX ON DBX.dbxref_id = PDBX.dbxref_id
- INNER JOIN db DB ON DB.db_id = DBX.db_id
- ";
- $args = array();
- if ($dbxref and preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
- $dbname = $matches[1];
- $accession = $matches[2];
- $sql .= "WHERE DBX.accession = '%s' and DB.name = '%s' ";
- $args[] = $accession;
- $args[] = $dbname;
- }
- elseif ($db) {
- $sql .= " WHERE DB.name = '%s' ";
- $args[] = $db;
- }
- $sql .= "ORDER BY DB.name, P.pub_id";
- $results = chado_query($sql, $args);
-
- $num_to_retrieve = 100;
- $i = 0;
- $curr_db = '';
- $ids = array();
- $search = array();
-
-
- while ($pub = db_fetch_object($results)) {
- $accession = $pub->accession;
- $remote_db = $pub->db_name;
-
-
- $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
- if(!in_array($remote_db, $supported_dbs)) {
- continue;
- }
- $search = array(
- 'num_criteria' => 1,
- 'remote_db' => $remote_db,
- 'criteria' => array(
- '1' => array(
- 'search_terms' => "$remote_db:$accession",
- 'scope' => 'id',
- 'operation' => '',
- 'is_phrase' => 0,
- ),
- ),
- );
- $pubs = tripal_pub_get_remote_search_results($remote_db, $search, 1, 0);
- tripal_pub_add_publications($pubs, $do_contact, TRUE);
-
- $i++;
- }
-
-
- tripal_db_commit_transaction();
-
- print "Transaction Complete\n";
-
-
- print "Syncing publications with Drupal...\n";
- tripal_pub_sync_pubs();
-
-
- if ($do_contact) {
- print "Syncing contacts with Drupal...\n";
- tripal_contact_sync_contacts();
- }
-
- print "Done.\n";
- }
- function tripal_pub_import_publications($report_email = FALSE) {
- $num_to_retrieve = 100;
- $pager_id = 0;
- $page = 0;
- $num_pubs = 0;
-
-
- $connection = tripal_db_persistent_chado();
- if (!$connection) {
- print "A persistant connection was not obtained. Loading will be slow\n";
- }
-
-
- tripal_db_start_transaction();
- if ($connection) {
- print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
- "If the load fails or is terminated prematurely then the entire set of \n" .
- "insertions/updates is rolled back and will not be found in the database\n\n";
- }
-
-
- $args = array();
- $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0 ";
- $results = db_query($sql, $args);
- $do_contact = FALSE;
- $reports = array();
- while ($import = db_fetch_object($results)) {
- print "Importing: " . $import->name . "\n";
-
- if ($import->do_contact == 1) {
- $do_contact = TRUE;
- }
- $criteria = unserialize($import->criteria);
- $remote_db = $criteria['remote_db'];
- do {
-
- $pubs = tripal_pub_get_remote_search_results($remote_db, $criteria, $num_to_retrieve, $pager_id, $page);
- $reports[$import->name] = tripal_pub_add_publications($pubs, $import->do_contact);
- $page++;
- }
-
-
- while (count($pubs) == $num_to_retrieve);
- }
-
-
- tripal_db_commit_transaction();
-
- print "Transaction Complete\n";
-
-
-
-
- print "Syncing publications with Drupal...\n";
- tripal_pub_sync_pubs();
-
-
- $HTML_report = '';
- if ($report_email) {
- $HTML_report .= "<html>";
- global $base_url;
- foreach ($reports as $importer => $report) {
- $total = count($report['inserted']);
- $HTML_report .= "<b>$total new publications from importer: $importer</b><br><ol>\n";
- foreach ($report['inserted'] as $pub) {
- $item = $pub['Title'];
- if ($pub['pub_id']) {
- $item = l($pub['Title'], "$base_url/pub/" . $pub['pub_id']);
- }
- $HTML_report .= "<li>$item</li>\n";
- }
- $HTML_report .= "</ol>\n";
- }
- $HTML_report .= "</html>";
- $site_email = variable_get('site_mail', '');
- $params = array(
- 'message' => $HTML_report
- );
- drupal_mail('tripal_pub', 'import_report', $report_email, language_default(), $params, $site_email, TRUE);
- }
-
-
- if($do_contact) {
- print "Syncing contacts with Drupal...\n";
- tripal_contact_sync_contacts();
- }
-
- print "Done.\n";
- }
- function tripal_pub_import_by_dbxref($pub_dbxref, $do_contact = FALSE) {
- $num_to_retrieve = 1;
- $pager_id = 0;
- $page = 0;
- $num_pubs = 0;
-
-
- $connection = tripal_db_persistent_chado();
- if (!$connection) {
- print "A persistant connection was not obtained. Loading will be slow\n";
- }
-
-
- tripal_db_start_transaction();
- if ($connection) {
- print "\nNOTE: Loading of the publication is performed using a database transaction. \n" .
- "If the load fails or is terminated prematurely then the entire set of \n" .
- "insertions/updates is rolled back and will not be found in the database\n\n";
- }
-
- if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
- $dbname = $matches[1];
- $accession = $matches[2];
-
- $criteria = array(
- 'num_criteria' => 1,
- 'remote_db' => $dbname,
- 'criteria' => array(
- '1' => array(
- 'search_terms' => "$dbname:$accession",
- 'scope' => 'id',
- 'operation' => '',
- 'is_phrase' => 0,
- ),
- ),
- );
- $remote_db = $criteria['remote_db'];
- $pubs = tripal_pub_get_remote_search_results($remote_db, $criteria, $num_to_retrieve, $pager_id, $page);
- $pub_id = tripal_pub_add_publications($pubs, $do_contact);
-
- }
-
-
- tripal_db_commit_transaction();
-
- print "Transaction Complete\n";
-
-
- print "Syncing publications with Drupal...\n";
- tripal_pub_sync_pubs();
-
-
- if($do_contact) {
- print "Syncing contacts with Drupal...\n";
- tripal_contact_sync_contacts();
- }
-
- print "Done.\n";
- }
- function tripal_pub_add_publications($pubs, $do_contact, $update = FALSE) {
- $report = array();
- $report['error'] = 0;
- $report['inserted'] = array();
- $report['skipped'] = array();
- $total_pubs = count($pubs);
-
-
- $i = 1;
- foreach ($pubs as $pub) {
- $memory = number_format(memory_get_usage()) . " bytes";
- print "Processing $i of $total_pubs. Memory usage: $memory.\r";
-
-
- $action = '';
- $pub_id = tripal_pub_add_publication($pub, $action, $do_contact, $update);
- if ($pub_id){
-
- if ($pub_id and $pub['Publication Dbxref']) {
- $pub_dbxref = tripal_pub_add_pub_dbxref($pub_id, $pub['Publication Dbxref']);
- }
- $pub['pub_id'] = $pub_id;
- }
- switch ($action) {
- case 'error':
- $report['error']++;
- break;
- case 'inserted':
- $report['inserted'][] = $pub;
- break;
- case 'skipped':
- $report['skipped'][] = $pub;
- break;
- }
- $i++;
- }
- print "\n";
- return $report;
- }
- function tripal_pub_add_pub_dbxref($pub_id, $pub_dbxref) {
-
-
- $dbname = '';
- $accession = '';
- if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
- $dbname = $matches[1];
- $accession = $matches[2];
- }
- else {
- return FALSE;
- }
-
-
- $values = array(
- 'dbxref_id' => array(
- 'accession' => $accession,
- 'db_id' => array(
- 'name' => $dbname,
- ),
- ),
- 'pub_id' => $pub_id,
- );
- $options = array('statement_name' => 'sel_pubdbxref_dbpu');
- $results = tripal_core_chado_select('pub_dbxref', array('*'), $values, $options);
-
-
- if(count($results) > 0) {
- return $results[0];
- }
-
-
- $db = tripal_db_add_db($dbname);
-
-
- $dbxvalues = array(
- 'accession' => $accession,
- 'db_id' => $db->db_id,
- );
- $dbxoptions = array('statement_name' => 'sel_dbxref_acdb');
- $results = tripal_core_chado_select('dbxref', array('dbxref_id'), $dbxvalues, $dbxoptions);
-
- if(count($results) == 0){
- $dbxref = tripal_db_add_dbxref($db->db_id, $accession);
- }
- else {
- $dbxref = $results[0];
- }
-
-
- $options = array('statement_name' => 'ins_pubdbxref_dbpu');
- $results = tripal_core_chado_insert('pub_dbxref', $values, $options);
- if (!$results) {
- watchdog('tripal_pub', "Cannot add publication dbxref: %db:%accession.",
- array('%db' => $dbname, '%accession' => $accession). WATCHDOG_ERROR);
- return FALSE;
- }
- return $results;
- }
- function tripal_pub_get_pubs_by_dbxref($pub_dbxref) {
-
- $return = array();
-
- if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
- $dbname = $matches[1];
- $accession = $matches[2];
-
- $values = array(
- 'dbxref_id' => array (
- 'accession' => $accession,
- 'db_id' => array(
- 'name' => $dbname
- ),
- ),
- );
- $options = array('statement_name' => 'sel_pubdbxref_db');
- $results = tripal_core_chado_select('pub_dbxref', array('pub_id'), $values, $options);
- foreach ($results as $index => $pub) {
- $return[] = $pub->pub_id;
- }
- }
- return $return;
- }
- function tripal_pub_get_pubs_by_title_type_pyear($title, $type, $pyear = '') {
-
- $return = array();
-
-
- $values = array(
- 'title' => $title,
- 'type_id' => array(
- 'name' => $type,
- 'cv_id' => array(
- 'name' => 'tripal_pub'
- )
- )
- );
- $stmnt_suffix = 'tity';
- if ($pub_details['Year']) {
- $values['pyear'] = $pyear;
- $stmnt_suffix .= 'py';
- }
- $options = array('statement_name' => 'sel_pub_' . $stmnt_suffix);
- $results = tripal_core_chado_select('pub', array('pub_id'), $values, $options);
-
-
- foreach ($results as $index => $pub) {
- $return[] = $pub->pub_id;
- }
- return $return;
- }
- function tripal_pub_add_publication($pub_details, &$action, $do_contact = FALSE, $update_if_exists = FALSE) {
-
- $pub_id = 0;
-
-
-
- if ($pub_details['Publication Dbxref']) {
- $results = tripal_pub_get_pubs_by_dbxref($pub_details['Publication Dbxref']);
- if(count($results) == 1) {
- $pub_id = $results[0];
- }
- elseif (count($results) > 1) {
- watchdog('tripal_pub', "There are two publications with this accession: %db:%accession. Cannot determine which to update.",
- array('%db' => $dbname, '%accession' => $accession), WATCHDOG_ERROR);
- $action = 'error';
- return FALSE;
- }
- }
-
-
-
- if (!$pub_id and $pub_details['Title']) {
- $results = tripal_pub_get_pubs_by_title_type_pyear($pub_details['Title'], $pub_details['Publication Type'], $pub_details['Year']);
- if (count($results) == 1) {
- $pub_id = $results[0];
- }
- elseif (count($results) > 1) {
- watchdog('tripal_pub', "The publication with the same title, type and year is present multiple times. Cannot ".
- "determine which to use. Title: '%title'. Type: '%type'. Year: '%year'",
- array('%title' => $pub_details['Title'], '%type' => $pub_details['Publication Type'], '%year' => $pub_details['Year']), WATCHDOG_ERROR);
- $action = 'error';
- return FALSE;
- }
- }
-
-
- if ($pub_id and !$update_if_exists) {
- $action = 'skipped';
- return $pub_id;
- }
-
-
- if (is_array($pub_details['Publication Type'])) {
- $pub_type = tripal_cv_get_cvterm_by_name($pub_details['Publication Type'][0], NULL, 'tripal_pub');
- }
- elseif ($pub_details['Publication Type']) {
- $pub_type = tripal_cv_get_cvterm_by_name($pub_details['Publication Type'], NULL, 'tripal_pub');
- }
- else {
- watchdog('tripal_pub', "The Publication Type is a required property but is missing", array(), WATCHDOG_ERROR);
- $action = 'error';
- return FALSE;
- }
- if (!$pub_type) {
- watchdog('tripal_pub', "Cannot find publication type: '%type'",
- array('%type' => $pub_details['Publication Type'][0]), WATCHDOG_ERROR);
- $action = 'error';
- return FALSE;
- }
-
- $values = array(
- 'title' => $pub_details['Title'],
- 'volume' => $pub_details['Volume'],
- 'series_name' => $pub_details['Journal Name'],
- 'issue' => $pub_details['Issue'],
- 'pyear' => $pub_details['Year'],
- 'pages' => $pub_details['Pages'],
- 'uniquename' => $pub_details['Citation'],
- 'type_id' => $pub_type->cvterm_id,
- );
-
-
- if (!$pub_id) {
- $options = array('statement_name' => 'ins_pub_tivoseispypaunty');
- $pub = tripal_core_chado_insert('pub', $values, $options);
- if (!$pub) {
- watchdog('tripal_pub', "Cannot insert the publication with title: %title",
- array('%title' => $pub_details['Title']), WATCHDOG_ERROR);
- $action = 'error';
- return FALSE;
- }
- $pub_id = $pub['pub_id'];
- $action = 'inserted';
- }
-
-
- if ($pub_id and $update_if_exists) {
- $match = array('pub_id' => $pub_id);
- $options = array('statement_name' => 'up_pub_tivoseispypaunty');
- $success = tripal_core_chado_update('pub', $match, $values, $options);
- if (!$success) {
- watchdog('tripal_pub', "Cannot update the publication with title: %title",
- array('%title' => $pub_details['Title']), WATCHDOG_ERROR);
- $action = 'error';
- return FALSE;
- }
- $action = 'updated';
- }
-
-
-
- if ($update_if_exists) {
- $sql = "
- DELETE FROM {pubprop}
- WHERE
- pub_id = %d AND
- NOT type_id in (
- SELECT cvterm_id
- FROM {cvterm}
- WHERE name = 'Publication Dbxref'
- )
- ";
- chado_query($sql, $pub_id);
- }
-
-
- foreach ($pub_details as $key => $value) {
-
- if($key == 'raw') {
- continue;
- }
-
- $cvterm = tripal_cv_get_cvterm_by_name($key, NULL, 'tripal_pub');
- if (!$cvterm) {
- $cvterm = tripal_cv_get_cvterm_by_synonym($key, NULL, 'tripal_pub');
- }
- if (!$cvterm) {
- watchdog('tripal_pub', "Cannot find term: '%prop'. Skipping.", array('%prop' => $key), WATCHDOG_ERROR);
- continue;
- }
-
- if ($key == 'Author List') {
- tripal_pub_add_authors($pub_id, $value, $do_contact);
- continue;
- }
- if ($key == 'Title' or $key == 'Volume' or $key == 'Journal Name' or $key == 'Issue' or
- $key == 'Year' or $key == 'Pages') {
- continue;
- }
-
- $success = 0;
- if (is_array($value)) {
- foreach ($value as $subkey => $subvalue) {
-
-
- if(is_int($subkey)) {
- $success = tripal_core_insert_property('pub', $pub_id, $key, 'tripal_pub', $subvalue, FALSE);
- }
- else {
- $success = tripal_core_insert_property('pub', $pub_id, $subkey, 'tripal_pub', $subvalue, FALSE);
- }
- }
- }
- else {
- $success = tripal_core_insert_property('pub', $pub_id, $key, 'tripal_pub', $value, TRUE);
- }
- if (!$success) {
- watchdog('tripal_pub', "Cannot add property '%prop' to publication. Skipping.",
- array('%prop' => $key), WATCHDOG_ERROR);
- continue;
- }
- }
-
- return $pub_id;
- }
- function tripal_pub_add_authors($pub_id, $authors, $do_contact) {
- $rank = 0;
-
-
- $sql = "DELETE FROM {pubauthor} WHERE pub_id = %d";
- chado_query($sql, $pub_id);
-
-
-
- foreach ($authors as $author) {
-
- if ($author['valid'] == 'N') {
- continue;
- }
-
- unset($author['valid']);
-
-
- $name = '';
- $type = 'Person';
- if ($author['Given Name']) {
- $name .= $author['Given Name'];
- }
- if ($author['Surname']) {
- $name .= ' ' . $author['Surname'];
- }
- if ($author['Suffix']) {
- $name .= ' ' . $author['Suffix'];
- }
- if ($author['Collective']) {
- $name = $author['Collective'];
- $type = 'Collective';
- }
- $name = trim($name);
-
-
- $values = array(
- 'pub_id' => $pub_id,
- 'rank' => $rank,
- 'surname' => $author['Surname'] ? $author['Surname'] : $author['Collective'],
- 'givennames' => $author['Given Name'],
- 'suffix' => $author['Suffix'],
- );
- $options = array('statement_name' => 'ins_pubauthor_idrasugisu');
- $pubauthor = tripal_core_chado_insert('pubauthor', $values, $options);
-
- if ($do_contact) {
-
- $contact = tripal_contact_add_contact($name, '', $type, $author);
-
-
-
- if ($contact and $pubauthor) {
-
-
- $values = array(
- 'pubauthor_id' => $pubauthor['pubauthor_id'],
- 'contact_id' => $contact['contact_id'],
- );
- $options = array('statement_name' => 'ins_pubauthorcontact_puco');
- $pubauthor_contact = tripal_core_chado_insert('pubauthor_contact', $values, $options);
- if (!$pubauthor_contact) {
- watchdog('tripal_pub', "Cannot link pub authro and contact.", array(), WATCHDOG_ERROR);
- }
- }
- }
- $rank++;
- }
- }
- function tripal_pub_get_property($pub_id, $property) {
- return tripal_core_get_property('pub', $pub_id, $property, 'tripal');
- }
- function tripal_pub_insert_property($pub_id, $property, $value, $update_if_present = 0) {
- return tripal_core_insert_property('pub', $pub_id, $property, 'tripal_pub', $value, $update_if_present);
- }
- function tripal_pub_update_property($pub_id, $property, $value, $insert_if_missing = 0) {
- return tripal_core_update_property('pub', $pub_id, $property, 'tripal_pub', $value, $insert_if_missing);
- }
- function tripal_pub_delete_property($pub_id, $property) {
- return tripal_core_delete_property('pub', $pub_id, $property, 'tripal');
- }
- function tripal_pub_create_citation($pub) {
- $citation = $pub['Authors'] . '. ' . $pub['Title'] . '. ';
-
- if ($pub['Journal Name']) {
- $citation .= $pub['Journal Name'] . '. ';
- }
- elseif ($pub['Journal Abbreviation']) {
- $citation .= $pub['Journal Abbreviation'] . '. ';
- }
- $citation .= $pub['Publication Date'];
- if ($pub['Volume'] or $pub['Issue'] or $pub['Pages']) {
- $citation .= '; ';
- }
- if ($pub['Volume']) {
- $citation .= $pub['Volume'];
- }
- if ($pub['Issue']) {
- $citation .= '(' . $pub['Issue'] . ')';
- }
- if ($pub['Pages']) {
- if($pub['Volume']) {
- $citation .= ':';
- }
- $citation .= $pub['Pages'];
- }
- $citation .= '.';
-
- return $citation;
- }
|