tripal_pub.api.inc 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867
  1. <?php
  2. /**
  3. * @file
  4. * Provides an application programming interface (API) to manage chado publications
  5. */
  6. /**
  7. * @defgroup tripal_pub_api Publication Module API
  8. * @ingroup tripal_api
  9. * @{
  10. * Provides an application programming interface (API) to manage chado publications
  11. *
  12. * @stephen add documentation here for how to add a new importer.
  13. *
  14. * @}
  15. */
  16. /**
  17. * Retrieves a list of publications as an associated array where
  18. * keys correspond directly with Tripal Pub CV terms.
  19. *
  20. * @param remote_db
  21. * The name of the remote publication database to query. These names should
  22. * match the name of the databases in the Chado 'db' table. Currently
  23. * supported databass include
  24. * 'PMID': PubMed
  25. *
  26. * @param search_array
  27. * An associate array containing the search criteria. The following key
  28. * are expected
  29. * 'remote_db': Specifies the name of the remote publication database
  30. * 'num_criteria': Specifies the number of criteria present in the search array
  31. * 'days': The number of days to include in the search starting from today
  32. * 'criteria': An associate array containing the search critiera. There should
  33. * be no less than 'num_criteria' elements in this array.
  34. *
  35. * The following keys are expected in the 'criteria' array
  36. * 'search_terms': A list of terms to search on, separated by spaces.
  37. * 'scope': The fields to search in the remote database. Valid values
  38. * include: 'title', 'abstract', 'author' and 'any'
  39. * 'operation': The logical operation to use for this criteria. Valid
  40. * values include: 'AND', 'OR' and 'NOT'.
  41. * @param $num_to_retrieve
  42. * The number of records to retrieve. In cases with large numbers of
  43. * records to retrieve, the remote database may limit the size of each
  44. * retrieval.
  45. * @param $page
  46. * Optional. If this function is called where the
  47. * page for the pager cannot be set using the $_GET variable, use this
  48. * argument to specify the page to retrieve.
  49. *
  50. * @return
  51. * Returns an array of pubs where each element is
  52. * an associative array where the keys are Tripal Pub CV terms.
  53. *
  54. * @ingroup tripal_pub_api
  55. */
  56. function tripal_get_remote_pubs($remote_db, $search_array, $num_to_retrieve, $page = 0) {
  57. // now call the callback function to get the results
  58. $callback = "tripal_pub_remote_search_$remote_db";
  59. $pubs = array(
  60. 'total_records' => 0,
  61. 'search_str' => '',
  62. 'pubs' => array(),
  63. );
  64. if (function_exists($callback)) {
  65. $pubs = call_user_func($callback, $search_array, $num_to_retrieve, $page);
  66. }
  67. return $pubs;
  68. }
  69. /**
  70. * This function is used to perfom a query using one of the supported databases
  71. * and return the raw query results. This may be XML or some other format
  72. * as provided by the database.
  73. *
  74. * @param $dbxref
  75. * The unique database ID for the record to retrieve. This value must
  76. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  77. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  78. * for the record in the database.
  79. *
  80. * @return
  81. * Returns the publication array or FALSE if a problem occurs
  82. *
  83. * @ingroup tripal_pub_api
  84. */
  85. function tripal_get_remote_pub($dbxref) {
  86. if(preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
  87. $remote_db = $matches[1];
  88. $accession = $matches[2];
  89. // check that the database is supported
  90. $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
  91. if(!in_array($remote_db, $supported_dbs)) {
  92. return FALSE;
  93. }
  94. $search = array(
  95. 'num_criteria' => 1,
  96. 'remote_db' => $remote_db,
  97. 'criteria' => array(
  98. '1' => array(
  99. 'search_terms' => "$remote_db:$accession",
  100. 'scope' => 'id',
  101. 'operation' => '',
  102. 'is_phrase' => 0,
  103. ),
  104. ),
  105. );
  106. $pubs = tripal_get_remote_pubs($remote_db, $search, 1, 0);
  107. return $pubs['pubs'][0];
  108. }
  109. return FALSE;
  110. }
  111. /**
  112. * Builds the SQL statement need to search Chado for the publications
  113. * that match the user supplied criteria. Tpyically, this function is
  114. * called by the search form generated by the tripal_pub_search_form() function
  115. * but this function is included in the API for calling by anyone.
  116. *
  117. * @param $search_array
  118. * An array of search criteria provided by the user. The search array is
  119. * an associative array with the following keys:
  120. * 'num_criteria': an integer indicating the number of search criteria supplied
  121. * 'from_year': filters records by a start year
  122. * 'to_year': filters records by an end year
  123. * 'criteria': an array of criteria. Each criteria is an associative
  124. * array with the following keys:
  125. * 'search_terms': The text used for searching
  126. * 'scope': The cvterm_id of the property used for filtering
  127. * 'mode': The operation (e.g. AND, OR or NOT)
  128. * @param $offset
  129. * The offset for paging records. The first record returned will be
  130. * at the offset indicated here, and the next $limit number of records
  131. * will be returned.
  132. *
  133. * @param $limit
  134. * The number of records to retrieve
  135. *
  136. * @param total_records
  137. * A value passed by reference. This value will get set to the total
  138. * number of matching records
  139. *
  140. * @return
  141. * a PDO database object of the query results.
  142. *
  143. * @ingroup tripal_pub
  144. */
  145. function tripal_search_publications($search_array, $offset, $limit, &$total_records) {
  146. // build the SQL based on the criteria provided by the user
  147. $select = "SELECT DISTINCT P.*, CP.nid ";
  148. $from = "FROM {pub} P
  149. LEFT JOIN public.chado_pub CP on P.pub_id = CP.pub_id
  150. INNER JOIN {cvterm} CVT on CVT.cvterm_id = P.type_id
  151. ";
  152. $where = "WHERE (NOT P.title = 'null') "; // always exclude the dummy pub
  153. $order = "ORDER BY P.pyear DESC, P.title ASC";
  154. $args = array(); // arguments for where clause
  155. $join = 0;
  156. $num_criteria = $search_array['num_criteria'];
  157. $from_year = $search_array['from_year'];
  158. $to_year = $search_array['to_year'];
  159. for ($i = 1; $i <= $num_criteria; $i++) {
  160. $value = $search_array['criteria'][$i]['search_terms'];
  161. $type_id = $search_array['criteria'][$i]['scope'];
  162. $mode = $search_array['criteria'][$i]['mode'];
  163. $op = $search_array['criteria'][$i]['operation'];
  164. // skip criteria with no values
  165. if(!$value) {
  166. continue;
  167. }
  168. // to prevent SQL injection make sure our operator is
  169. // what we expect
  170. if ($op and $op != "AND" and $op != "OR" and $op != 'NOT') {
  171. $op = 'AND';
  172. }
  173. if ($op == 'NOT') {
  174. $op = 'AND NOT';
  175. }
  176. if (!$op) {
  177. $op = 'AND';
  178. }
  179. // get the scope type
  180. $values = array('cvterm_id' => $type_id);
  181. $cvterm = chado_select_record('cvterm', array('name'), $values);
  182. $type_name = '';
  183. if (count($cvterm) > 0) {
  184. $type_name = $cvterm[0]->name;
  185. }
  186. if ($type_name == 'Title') {
  187. $where .= " $op (lower(P.title) LIKE lower(:crit$i)) ";
  188. $args[":crit$i"] = '%' . $value . '%';
  189. }
  190. elseif ($type_name == 'Year') {
  191. $where .= " $op (lower(P.pyear) = lower(:crit$i)) ";
  192. $args[":crit$i"] = '%' . $value . '%';
  193. }
  194. elseif ($type_name == 'Volume') {
  195. $where .= " $op (lower(P.volume) = lower(:crit$i)) ";
  196. $args[":crit$i"] = '%' . $value . '%';
  197. }
  198. elseif ($type_name == 'Issue') {
  199. $where .= " $op (lower(P.issue) = lower(:crit$i)) ";
  200. $args[":crit$i"] = '%' . $value . '%';
  201. }
  202. elseif ($type_name == 'Journal Name') {
  203. $from .= " LEFT JOIN {pubprop} PP$i ON PP$i.pub_id = P.pub_id AND PP$i.type_id = :crit$i ";
  204. $where .= " $op ((lower(P.series_name) = lower(:crit$i) and CVT.name = 'Journal Article') OR
  205. (lower(PP$i.value) = lower(:crit$i))) ";
  206. $args[":crit$i"] = $type_id;
  207. }
  208. elseif ($type_name == 'Conference Name') {
  209. $from .= " LEFT JOIN {pubprop} PP$i ON PP$i.pub_id = P.pub_id AND PP$i.type_id = :crit$i ";
  210. $where .= " $op ((lower(P.series_name) = lower(:crit$i) and CVT.name = 'Conference Proceedings') OR
  211. (lower(PP$i.value) = lower(:crit$i))) ";
  212. $args[":crit$i"] = $type_id;
  213. }
  214. elseif ($type_name == 'Publication Type') {
  215. $where .= " $op (lower(CVT.name) = lower(:crit$i))";
  216. $args[":crit$i"] = $value;
  217. }
  218. elseif ($type_id == 0) { //'Any Field'
  219. $from .= " LEFT JOIN {pubprop} PP$i ON PP$i.pub_id = P.pub_id ";
  220. $where .= " $op (lower(PP$i.value) LIKE lower(:crit$i) OR
  221. lower(P.title) LIKE lower(:crit$i) OR
  222. lower(P.volumetitle) LIKE lower(:crit$i) OR
  223. lower(P.publisher) LIKE lower(:crit$i) OR
  224. lower(P.uniquename) LIKE lower(:crit$i) OR
  225. lower(P.pubplace) LIKE lower(:crit$i) OR
  226. lower(P.miniref) LIKE lower(:crit$i) OR
  227. lower(P.series_name) LIKE lower(:crit$i)) ";
  228. $args[":crit$i"] = '%' . $value . '%';
  229. }
  230. // for all other properties
  231. else {
  232. $from .= " LEFT JOIN {pubprop} PP$i ON PP$i.pub_id = P.pub_id AND PP$i.type_id = :type_id$i ";
  233. $where .= " $op (lower(PP$i.value) LIKE lower(:crit$i)) ";
  234. $args[":crit$i"] = '%' . $value . '%';
  235. $args[":type_id$i"] = $type_id;
  236. }
  237. }
  238. if($from_year and $to_year) {
  239. $where .= " AND (P.pyear ~ '....' AND to_number(P.pyear,'9999') >= :from$i AND to_number(P.pyear,'9999') <= :to$i) ";
  240. $args[":from$i"] = $from_year;
  241. $args[":to$i"] = $to_year;
  242. }
  243. $sql = "$select $from $where $order LIMIT " . (int) $limit . ' OFFSET ' . (int) $offset;
  244. $count = "SELECT count(*) FROM ($select $from $where $order) as t1";
  245. // first get the total number of matches
  246. $total_records = chado_query($count, $args)->fetchField();
  247. $results = chado_query($sql, $args);
  248. return $results;
  249. }
  250. /**
  251. * Retrieves a chado publication array
  252. *
  253. * @param $identifier
  254. * An array used to uniquely identify a publication. This array has the same
  255. * format as that used by the chado_generate_var(). The following keys can be
  256. * useful for uniquely identifying a publication as they should be unique:
  257. * - pub_id: the chado pub.pub_id primary key
  258. * - nid: the drupal nid of the publication
  259. * - uniquename: A value to matach with the pub.uniquename field
  260. * There are also some specially handled keys. They are:
  261. * - property: An array describing the property to select records for. It
  262. * should at least have either a 'type_name' key (if unique across cvs) or
  263. * 'type_id' key. Other supported keys include: 'cv_id', 'cv_name' (of the type),
  264. * 'value' and 'rank'
  265. * - dbxref: The database cross reference accession. It should be in the form
  266. * DB:ACCESSION, where DB is the database name and ACCESSION is the
  267. * unique publication identifier (e.g. PMID:4382934)
  268. * - dbxref_id: The dbxref.dbxref_id of the publication.
  269. * @param $options
  270. * An array of options. Supported keys include:
  271. * - Any keys supported by chado_generate_var(). See that function definition for
  272. * additional details.
  273. *
  274. * NOTE: the $identifier parameter can really be any array similar to $values passed into
  275. * chado_select_record(). It should fully specify the pub record to be returned.
  276. *
  277. * @return
  278. * If a singe publication is retreived using the identifiers, then a publication
  279. * array will be returned. The array is of the same format returned by the
  280. * chado_generate_var() function. Otherwise, FALSE will be returned.
  281. *
  282. * @ingroup tripal_pub_api
  283. */
  284. function tripal_get_publication($identifiers, $options = array()) {
  285. // Error Checking of parameters
  286. if (!is_array($identifiers)) {
  287. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  288. "chado_get_publication: The identifier passed in is expected to be an array with the key
  289. matching a column name in the pub table (ie: pub_id or name). You passed in %identifier.",
  290. array('%identifier'=> print_r($identifiers, TRUE))
  291. );
  292. }
  293. elseif (empty($identifiers)) {
  294. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  295. "chado_get_publication: You did not pass in anything to identify the publication you want. The identifier
  296. is expected to be an array with the key matching a column name in the pub table
  297. (ie: pub_id or name). You passed in %identifier.",
  298. array('%identifier'=> print_r($identifiers, TRUE))
  299. );
  300. }
  301. // If one of the identifiers is property then use chado_get_record_with_property()
  302. if (array_key_exists('property', $identifiers)) {
  303. $property = $identifiers['property'];
  304. unset($identifiers['property']);
  305. $pub = chado_get_record_with_property('pub', $property, $identifiers, $options);
  306. }
  307. elseif (array_key_exists('dbxref', $identifiers)) {
  308. if(preg_match('/^(.*?):(.*?)$/', $identifiers['dbxref'], $matches)) {
  309. $dbname = $matches[1];
  310. $accession = $matches[2];
  311. $values = array(
  312. 'dbxref_id' => array (
  313. 'accession' => $accession,
  314. 'db_id' => array(
  315. 'name' => $dbname
  316. ),
  317. ),
  318. );
  319. $pub_dbxref = chado_select_record('pub_dbxref', array('pub_id'), $values);
  320. if (count($pub_dbxref) > 0) {
  321. $pub = chado_generate_var('pub', array('pub_id' => $pub_dbxref[0]->pub_id), $options);
  322. }
  323. else {
  324. return FALSE;
  325. }
  326. }
  327. else {
  328. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  329. "chado_get_publication: The dbxref identifier is not correctly formatted.",
  330. array('%identifier'=> print_r($identifiers, TRUE))
  331. );
  332. }
  333. }
  334. elseif (array_key_exists('dbxref_id', $identifiers)) {
  335. // first get the pub_dbxref record
  336. $values = array('dbxref_id' => $identifiers['dbxref_id']);
  337. $pub_dbxref = chado_select_record('pub_dbxref', array('pub_id'), $values);
  338. // now get the pub
  339. if (count($pub_dbxref) > 0) {
  340. $pub = chado_generate_var('pub', array('pub_id' => $pub_dbxref[0]->pub_id), $options);
  341. }
  342. else {
  343. return FALSE;
  344. }
  345. }
  346. // Else we have a simple case and we can just use chado_generate_var to get the pub
  347. else {
  348. // Try to get the pub
  349. $pub = chado_generate_var('pub', $identifiers, $options);
  350. }
  351. // Ensure the pub is singular. If it's an array then it is not singular
  352. if (is_array($pub)) {
  353. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  354. "chado_get_publication: The identifiers did not find a single unique record. Identifiers passed: %identifier.",
  355. array('%identifier'=> print_r($identifiers, TRUE))
  356. );
  357. }
  358. // Report an error if $pub is FALSE since then chado_generate_var has failed
  359. elseif ($pub === FALSE) {
  360. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  361. "chado_get_publication: Could not find a publication using the identifiers
  362. provided. Check that the identifiers are correct. Identifiers passed: %identifier.",
  363. array('%identifier'=> print_r($identifiers, TRUE))
  364. );
  365. }
  366. // Else, as far we know, everything is fine so give them their pub :)
  367. else {
  368. return $pub;
  369. }
  370. }
  371. /**
  372. * The publication table of Chado only has a unique constraint for the
  373. * uniquename of the publiation, but in reality a publication can be considered
  374. * unique by a combination of the title, publication type, published year and
  375. * series name (e.g. journal name or conference name). The site administrator
  376. * can configure how publications are determined to be unique. This function
  377. * uses the configuration specified by the administrator to look for publications
  378. * that match the details specified by the $pub_details argument
  379. * and indicates if one ore more publications match the criteria.
  380. *
  381. * @param $pub_details
  382. * An associative array with details about the publications. The expected keys
  383. * are:
  384. * 'Title': The title of the publication
  385. * 'Year': The published year of the publication
  386. * 'Publication Type': An array of publication types. A publication can have more than one type.
  387. * 'Series Name': The series name of the publication
  388. * 'Journal Name': An alternative to 'Series Name'
  389. * 'Conference Name': An alternative to 'Series Name'
  390. * 'Citation': The publication citation (this is the value saved in the pub.uniquename field and must be unique)
  391. * If this key is present it will also be checked
  392. * 'Publication Dbxref': A database cross reference of the form DB:ACCESSION where DB is the name
  393. * of the database and ACCESSION is the unique identifier (e.g PMID:3483139)
  394. *
  395. * @return
  396. * An array containing the pub_id's of matching publications. Returns an
  397. * empty array if no pubs match
  398. */
  399. function tripal_publication_exists($pub_details) {
  400. // first try to find the publication using the accession number if that key exists in the details array
  401. if (array_key_exists('Publication Dbxref', $pub_details)) {
  402. $pub = chado_get_publication(array('dbxref' => $pub_details['Publication Dbxref']));
  403. if($pub) {
  404. return array($pub->pub_id);
  405. }
  406. }
  407. // make sure the citation is unique
  408. if (array_key_exists('Citation', $pub_details)) {
  409. $pub = chado_get_publication(array('uniquename' => $pub_details['Citation']));
  410. if($pub) {
  411. return array($pub->pub_id);
  412. }
  413. }
  414. // get the publication type (use the first publication type)
  415. if (array_key_exists('Publication Type', $pub_details)) {
  416. $type_name = '';
  417. if(is_array($pub_details['Publication Type'])) {
  418. $type_name = $pub_details['Publication Type'][0];
  419. }
  420. else {
  421. $type_name = $pub_details['Publication Type'];
  422. }
  423. $identifiers = array(
  424. 'name' => $type_name,
  425. 'cv_id' => array(
  426. 'name' => 'tripal_pub',
  427. ),
  428. );
  429. $pub_type = tripal_get_cvterm($identifiers);
  430. }
  431. else {
  432. tripal_report_error('tripal_pub', TRIPAL_ERROR,
  433. "chado_does_pub_exist(): The Publication Type is a " .
  434. "required property but is missing", array());
  435. return array();
  436. }
  437. if (!$pub_type) {
  438. tripal_report_error('tripal_pub', TRIPAL_ERROR,
  439. "chado_does_pub_exist(): Cannot find publication type: '%type'",
  440. array('%type' => $pub_details['Publication Type'][0]));
  441. return array();
  442. }
  443. // get the series name. The pub.series_name field is only 255 chars so we must truncate to be safe
  444. $series_name = '';
  445. if (array_key_exists('Series Name', $pub_details)) {
  446. $series_name = substr($pub_details['Series Name'], 0, 255);
  447. }
  448. if (array_key_exists('Journal Name', $pub_details)) {
  449. $series_name = substr($pub_details['Journal Name'], 0, 255);
  450. }
  451. if (array_key_exists('Conference Name', $pub_details)) {
  452. $series_name = substr($pub_details['Conference Name'], 0, 255);
  453. }
  454. // make sure the publication is unique using the prefereed import duplication check
  455. $import_dups_check = variable_get('tripal_pub_import_duplicate_check', 'title_year_media');
  456. $pubs = array();
  457. switch ($import_dups_check) {
  458. case 'title_year':
  459. $identifiers = array(
  460. 'title' => $pub_details['Title'],
  461. 'pyear' => $pub_details['Year']
  462. );
  463. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  464. break;
  465. case 'title_year_type':
  466. $identifiers = array(
  467. 'title' => $pub_details['Title'],
  468. 'pyear' => $pub_details['Year'],
  469. 'type_id' => $pub_type->cvterm_id,
  470. );
  471. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  472. break;
  473. case 'title_year_media':
  474. $identifiers = array(
  475. 'title' => $pub_details['Title'],
  476. 'pyear' => $pub_details['Year'],
  477. 'series_name' => $series_name,
  478. );
  479. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  480. break;
  481. }
  482. $return = array();
  483. foreach ($pubs as $pub) {
  484. $return[] = $pub->pub_id;
  485. }
  486. return $return;
  487. }
  488. /**
  489. * Updates publication records that currently exist in the Chado pub table
  490. * with the most recent data in the remote database.
  491. *
  492. * @param $do_contact
  493. * Set to TRUE if authors should automatically have a contact record added
  494. * to Chado. Contacts are added using the name provided by the remote
  495. * database.
  496. * @param $dbxref
  497. * The unique database ID for the record to update. This value must
  498. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  499. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  500. * for the record in the database.
  501. * @param $db
  502. * The name of the remote database to update. If this value is provided and
  503. * no dbxref then all of the publications currently in the Chado database
  504. * for this remote database will be updated.
  505. *
  506. * @ingroup tripal_pub_api
  507. */
  508. function tripal_reimport_publications($do_contact = FALSE, $dbxref = NULL, $db = NULL) {
  509. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  510. "If the load fails or is terminated prematurely then the entire set of \n" .
  511. "insertions/updates is rolled back and will not be found in the database\n\n";
  512. $transaction = db_transaction();
  513. try {
  514. // get a list of all publications by their Dbxrefs that have supported databases
  515. $sql = "
  516. SELECT DB.name as db_name, DBX.accession
  517. FROM pub P
  518. INNER JOIN pub_dbxref PDBX ON P.pub_id = PDBX.pub_id
  519. INNER JOIN dbxref DBX ON DBX.dbxref_id = PDBX.dbxref_id
  520. INNER JOIN db DB ON DB.db_id = DBX.db_id
  521. ";
  522. $args = array();
  523. if ($dbxref and preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
  524. $dbname = $matches[1];
  525. $accession = $matches[2];
  526. $sql .= "WHERE DBX.accession = :accession and DB.name = :dbname ";
  527. $args[':accession'] = $accession;
  528. $args[':dbname'] = $dbname;
  529. }
  530. elseif ($db) {
  531. $sql .= " WHERE DB.name = :dbname ";
  532. $args[':dbname'] = $db;
  533. }
  534. $sql .= "ORDER BY DB.name, P.pub_id";
  535. $results = chado_query($sql, $args);
  536. $num_to_retrieve = 100;
  537. $i = 0; // count the number of IDs. When we hit $num_to_retrieve we'll do the query
  538. $curr_db = ''; // keeps track of the current current database
  539. $ids = array(); // the list of IDs for the database
  540. $search = array(); // the search array passed to the search function
  541. // iterate through the pub IDs
  542. while ($pub = $results->fetchObject()) {
  543. $accession = $pub->accession;
  544. $remote_db = $pub->db_name;
  545. // here we need to only update publications for databases we support
  546. $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
  547. if(!in_array($remote_db, $supported_dbs)) {
  548. continue;
  549. }
  550. $search = array(
  551. 'num_criteria' => 1,
  552. 'remote_db' => $remote_db,
  553. 'criteria' => array(
  554. '1' => array(
  555. 'search_terms' => "$remote_db:$accession",
  556. 'scope' => 'id',
  557. 'operation' => '',
  558. 'is_phrase' => 0,
  559. ),
  560. ),
  561. );
  562. $pubs = tripal_get_remote_pubs($remote_db, $search, 1, 0);
  563. tripal_pub_add_publications($pubs, $do_contact, TRUE);
  564. $i++;
  565. }
  566. // sync the newly added publications with Drupal
  567. print "Syncing publications with Drupal...\n";
  568. chado_node_sync_records('pub');
  569. // if the caller wants to create contacts then we should sync them
  570. if ($do_contact) {
  571. print "Syncing contacts with Drupal...\n";
  572. chado_node_sync_records('contact');
  573. }
  574. }
  575. catch (Exception $e) {
  576. $transaction->rollback();
  577. print "\n"; // make sure we start errors on new line
  578. watchdog_exception('T_pub_import', $e);
  579. print "FAILED: Rolling back database changes...\n";
  580. return;
  581. }
  582. print "Done.\n";
  583. }
  584. /**
  585. * Imports all publications for a given publication import setup.
  586. *
  587. * @param $import_id
  588. * The ID of the import setup to use
  589. * @param $job_id
  590. * The jobs management job_id for the job if this function is run as a job.
  591. *
  592. * @ingroup tripal_pub_api
  593. */
  594. function tripal_execute_pub_importer($import_id, $job_id = NULL) {
  595. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  596. "If the load fails or is terminated prematurely then the entire set of \n" .
  597. "insertions/updates is rolled back and will not be found in the database\n\n";
  598. // start the transaction
  599. $transaction = db_transaction();
  600. try {
  601. $page = 0;
  602. $do_contact = FALSE;
  603. $num_to_retrieve = 100;
  604. // get all of the loaders
  605. $args = array(':import_id' => $import_id);
  606. $sql = "SELECT * FROM {tripal_pub_import} WHERE pub_import_id = :import_id ";
  607. $import = db_query($sql, $args)->fetchObject();
  608. print "Executing Importer: '" . $import->name . "'\n";
  609. $criteria = unserialize($import->criteria);
  610. $remote_db = $criteria['remote_db'];
  611. $total_pubs = 0;
  612. do {
  613. // retrieve the pubs for this page. We'll retreive 100 at a time
  614. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  615. $pubs = $results['pubs'];
  616. $num_pubs = $rseults['total_records'];
  617. $total_pubs += $num_pubs;
  618. tripal_pub_add_publications($pubs, $import->do_contact);
  619. $page++;
  620. }
  621. // continue looping until we have a $pubs array that does not have
  622. // our requested numer of records. This means we've hit the end
  623. while (count($pubs) == $num_to_retrieve);
  624. // sync the newly added publications with Drupal. If the user
  625. // requested a report then we don't want to print any syncing information
  626. // so pass 'FALSE' to the sync call
  627. print "Syncing publications with Drupal...\n";
  628. chado_node_sync_records('pub');
  629. // if any of the importers wanted to create contacts from the authors then sync them
  630. if($import->do_contact) {
  631. print "Syncing contacts with Drupal...\n";
  632. chado_node_sync_records('contact');
  633. }
  634. tripal_set_job_progress($job_id, '100');
  635. }
  636. catch (Exception $e) {
  637. $transaction->rollback();
  638. print "\n"; // make sure we start errors on new line
  639. watchdog_exception('T_pub_import', $e);
  640. print "FAILED: Rolling back database changes...\n";
  641. return;
  642. }
  643. print "Done.\n";
  644. }
  645. /**
  646. * Imports all publications for all active import setups.
  647. *
  648. * @param $report_email
  649. * A list of email address, separated by commas, that should be notified
  650. * once importing has completed
  651. * @param $do_update
  652. * If set to TRUE then publications that already exist in the Chado database
  653. * will be updated, whereas if FALSE only new publications will be added
  654. *
  655. * @ingroup tripal_pub_api
  656. */
  657. function tripal_execute_active_pub_importers($report_email = FALSE, $do_update = FALSE) {
  658. $num_to_retrieve = 100;
  659. $page = 0;
  660. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  661. "If the load fails or is terminated prematurely then the entire set of \n" .
  662. "insertions/updates is rolled back and will not be found in the database\n\n";
  663. // start the transaction
  664. $transaction = db_transaction();
  665. try {
  666. // get all of the loaders
  667. $args = array();
  668. $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0 ";
  669. $results = db_query($sql, $args);
  670. $do_contact = FALSE;
  671. $reports = array();
  672. foreach ($results as $import) {
  673. $page = 0;
  674. print "Executing importer: '" . $import->name . "'\n";
  675. // keep track if any of the importers want to create contacts from authors
  676. if ($import->do_contact == 1) {
  677. $do_contact = TRUE;
  678. }
  679. $criteria = unserialize($import->criteria);
  680. $remote_db = $criteria['remote_db'];
  681. do {
  682. // retrieve the pubs for this page. We'll retreive 100 at a time
  683. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  684. $pubs = $results['pubs'];
  685. $reports[$import->name] = tripal_pub_add_publications($pubs, $import->do_contact, $do_update);
  686. $page++;
  687. }
  688. // continue looping until we have a $pubs array that does not have
  689. // our requested numer of records. This means we've hit the end
  690. while (count($pubs) == $num_to_retrieve);
  691. }
  692. // sync the newly added publications with Drupal. If the user
  693. // requested a report then we don't want to print any syncing information
  694. // so pass 'FALSE' to the sync call
  695. print "Syncing publications with Drupal...\n";
  696. chado_node_sync_records('pub');
  697. // iterate through each of the reports and generate a final report with HTML links
  698. $HTML_report = '';
  699. if ($report_email) {
  700. $HTML_report .= "<html>";
  701. global $base_url;
  702. foreach ($reports as $importer => $report) {
  703. $total = count($report['inserted']);
  704. $HTML_report .= "<b>$total new publications from importer: $importer</b><br><ol>\n";
  705. foreach ($report['inserted'] as $pub) {
  706. $item = $pub['Title'];
  707. if (array_key_exists('pub_id', $pub)) {
  708. $item = l($pub['Title'], "$base_url/pub/" . $pub['pub_id']);
  709. }
  710. $HTML_report .= "<li>$item</li>\n";
  711. }
  712. $HTML_report .= "</ol>\n";
  713. }
  714. $HTML_report .= "</html>";
  715. $site_email = variable_get('site_mail', '');
  716. $params = array(
  717. 'message' => $HTML_report
  718. );
  719. drupal_mail('tripal_pub', 'import_report', $report_email, language_default(), $params, $site_email, TRUE);
  720. }
  721. // if any of the importers wanted to create contacts from the authors then sync them
  722. if($do_contact) {
  723. print "Syncing contacts with Drupal...\n";
  724. chado_node_sync_records('contact');
  725. }
  726. }
  727. catch (Exception $e) {
  728. $transaction->rollback();
  729. print "\n"; // make sure we start errors on new line
  730. watchdog_exception('T_pub_import', $e);
  731. print "FAILED: Rolling back database changes...\n";
  732. return;
  733. }
  734. print "Done.\n";
  735. }
  736. /**
  737. * Imports a singe publication specified by a remote database cross reference.
  738. *
  739. * @param $pub_dbxref
  740. * The unique database ID for the record to update. This value must
  741. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  742. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  743. * for the record in the database.
  744. * @param $do_contact
  745. * Set to TRUE if authors should automatically have a contact record added
  746. * to Chado.
  747. * @param $do_update
  748. * If set to TRUE then the publication will be updated if it already exists
  749. * in the database.
  750. *
  751. * @ingroup tripal_pub_api
  752. */
  753. function tripal_import_pub_by_dbxref($pub_dbxref, $do_contact = FALSE, $do_update) {
  754. $num_to_retrieve = 1;
  755. $pager_id = 0;
  756. $page = 0;
  757. $num_pubs = 0;
  758. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  759. "If the load fails or is terminated prematurely then the entire set of \n" .
  760. "insertions/updates is rolled back and will not be found in the database\n\n";
  761. $transaction = db_transaction();
  762. try {
  763. if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
  764. $dbname = $matches[1];
  765. $accession = $matches[2];
  766. $criteria = array(
  767. 'num_criteria' => 1,
  768. 'remote_db' => $dbname,
  769. 'criteria' => array(
  770. '1' => array(
  771. 'search_terms' => "$dbname:$accession",
  772. 'scope' => 'id',
  773. 'operation' => '',
  774. 'is_phrase' => 0,
  775. ),
  776. ),
  777. );
  778. $remote_db = $criteria['remote_db'];
  779. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  780. $pubs = $results['pubs'];
  781. $search_str = $results['search_str'];
  782. $total_records = $results['total_records'];
  783. $pub_id = tripal_pub_add_publications($pubs, $do_contact, $do_update);
  784. }
  785. // sync the newly added publications with Drupal
  786. print "Syncing publications with Drupal...\n";
  787. chado_node_sync_records('pub');
  788. // if any of the importers wanted to create contacts from the authors then sync them
  789. if($do_contact) {
  790. print "Syncing contacts with Drupal...\n";
  791. chado_node_sync_records('contact');
  792. }
  793. }
  794. catch (Exception $e) {
  795. $transaction->rollback();
  796. print "\n"; // make sure we start errors on new line
  797. watchdog_exception('T_pub_import', $e);
  798. print "FAILED: Rolling back database changes...\n";
  799. return;
  800. }
  801. print "Done.\n";
  802. }