tripal_pub.pub_importers.inc 67 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906
  1. <?php
  2. /**
  3. * @file
  4. * Management of importers
  5. */
  6. /**
  7. * A function to generate a table containing the list of publication importers
  8. *
  9. * @ingroup tripal_pub
  10. */
  11. function tripal_pub_importers_list() {
  12. // Check to make sure that the tripal_pub vocabulary is loaded. If not, then
  13. // warn the user that they should load it before continuing.
  14. $pub_cv = chado_select_record('cv', array('cv_id'), array('name' => 'tripal_pub'));
  15. if (count($pub_cv) == 0) {
  16. drupal_set_message(t('The Tripal Pub vocabulary is currently not loaded. ' .
  17. 'This vocabulary is required to be loaded before importing of ' .
  18. 'publications. <br>Please !import',
  19. array('!import' => l('load the Tripal Publication vocabulary', 'admin/tripal/loaders/obo_loader'))), 'warning');
  20. }
  21. // clear out the session variable when we view the list.
  22. unset($_SESSION['tripal_pub_import']);
  23. $header = array('', 'Importer Name', 'Database', 'Search String', 'Disabled', 'Create Contact', '');
  24. $rows = array();
  25. $importers = db_query("SELECT * FROM {tripal_pub_import} ORDER BY name");
  26. while ($importer = $importers->fetchObject()) {
  27. $criteria = unserialize($importer->criteria);
  28. $num_criteria = $criteria['num_criteria'];
  29. $criteria_str = '';
  30. for ($i = 1; $i <= $num_criteria; $i++) {
  31. $search_terms = $criteria['criteria'][$i]['search_terms'];
  32. $scope = $criteria['criteria'][$i]['scope'];
  33. $is_phrase = $criteria['criteria'][$i]['is_phrase'];
  34. $operation = $criteria['criteria'][$i]['operation'];
  35. $criteria_str .= "$operation ($scope: $search_terms) ";
  36. }
  37. $rows[] = array(
  38. array(
  39. 'data' => l(t('Edit/Test'), "admin/tripal/chado/tripal_pub/import/edit/$importer->pub_import_id") . '<br>' .
  40. l(t('Import Pubs'), "admin/tripal/chado/tripal_pub/import/submit/$importer->pub_import_id"),
  41. 'nowrap' => 'nowrap'
  42. ),
  43. $importer->name,
  44. $criteria['remote_db'],
  45. $criteria_str,
  46. $importer->disabled ? 'Yes' : 'No',
  47. $importer->do_contact ? 'Yes' : 'No',
  48. l(t('Delete'), "admin/tripal/chado/tripal_pub/import/delete/$importer->pub_import_id"),
  49. );
  50. }
  51. $page = "<ul class='action-links'>";
  52. $page .= ' <li>' . l('New Importer', 'admin/tripal/chado/tripal_pub/import/new') . '</li>';
  53. $page .= '</ul>';
  54. $page .= '<p>' . t(
  55. "A publication importer is used to create a set of search criteria that can be used
  56. to query a remote database, find publications that match the specified criteria
  57. and then import those publications into the Chado database. An example use case would
  58. be to peridocially add new publications to this Tripal site that have appeared in PubMed
  59. in the last 30 days. You can import publications in one of two ways:
  60. <ol>
  61. <li>Create a new importer by clicking the 'New Importer' link above, and after saving it should appear in the list below. Click the
  62. link labeled 'Import Pubs' to schedule a job to import the publications</li>
  63. <li>The first method only performs the import once. However, you can schedule the
  64. importer to run peridically by adding a cron job. See the " .
  65. l("Pub Module help instructions", "admin/tripal/chado/tripal_pub/help") . " to learn how to
  66. set the importers to run automatically.") . '</li>
  67. </ol><br>';
  68. $page .= theme('table', array('header' => $header, 'rows' => $rows));
  69. return $page;
  70. }
  71. /**
  72. * Creates the page that contains the publication importer setup form and
  73. * test results.
  74. *
  75. * @param $action
  76. * The action to perform
  77. * @param $pub_import_id
  78. * The importer ID
  79. *
  80. * @return
  81. * The HTML for the importer setup page
  82. *
  83. * @ingroup tripal_pub
  84. */
  85. function tripal_pub_importer_setup_page($action = 'new', $pub_import_id = NULL) {
  86. global $base_path;
  87. // make sure the tripal_pub and tripal_contact ontologies are loaded
  88. $values = array('name' => 'tripal_pub');
  89. $tpub_cv = chado_select_record('cv', array('cv_id'), $values);
  90. if (count($tpub_cv) == 0) {
  91. drupal_set_message(t('Before importing publications you must first ') . l(t('load the Tripal Pub Ontology'), 'admin/tripal/tripal_cv/obo_loader'), 'error');
  92. }
  93. $values = array('name' => 'tripal_contact');
  94. $tpub_cv = chado_select_record('cv', array('cv_id'), $values);
  95. if (count($tpub_cv) == 0) {
  96. drupal_set_message(t('If you want to create contact pages for authors, you must first ') . l(t('load the Tripal Contact Ontology'), 'admin/tripal/tripal_cv/obo_loader'), 'error');
  97. }
  98. if(!extension_loaded ('yaz')){
  99. drupal_set_message(t('<b>Note:</b> In order to create an importer using the USDA National Agricultural Library (AGL) you must install the yaz libraries. See the ') . l(t('Pub Module help page'), 'admin/tripal/chado/tripal_pub/help') . ' for assistance. If you do not want to use AGL you can ignore this warning.', 'warning');
  100. }
  101. // generate the search form
  102. $form = drupal_get_form('tripal_pub_importer_setup_form', $pub_import_id, $action);
  103. $output = l("Return to publication importers list", "admin/tripal/chado/tripal_pub/import_list");
  104. $output .= drupal_render($form);
  105. // retrieve any results
  106. if (array_key_exists('tripal_pub_import', $_SESSION)) {
  107. $remote_db = $_SESSION['tripal_pub_import']['remote_db'];
  108. $num_criteria = $_SESSION['tripal_pub_import']['num_criteria'];
  109. $days = $_SESSION['tripal_pub_import']['days'];
  110. $search_array = array();
  111. $search_array['remote_db'] = $remote_db;
  112. $search_array['num_criteria'] = $num_criteria;
  113. $search_array['days'] = $days;
  114. for ($i = 1; $i <= $num_criteria; $i++) {
  115. $search_array['criteria'][$i]['search_terms'] = $_SESSION['tripal_pub_import']['criteria'][$i]['search_terms'];
  116. $search_array['criteria'][$i]['scope'] = $_SESSION['tripal_pub_import']['criteria'][$i]['scope'];
  117. $search_array['criteria'][$i]['is_phrase'] = $_SESSION['tripal_pub_import']['criteria'][$i]['is_phrase'];
  118. $search_array['criteria'][$i]['operation'] = $_SESSION['tripal_pub_import']['criteria'][$i]['operation'];
  119. }
  120. // If the form has been submitted with the 'test' button then get results.
  121. if ($_SESSION['tripal_pub_import']['perform_search']) {
  122. $limit = 25;
  123. // Get the list of publications from the remote database using the search
  124. // criteria.
  125. $page = isset($_GET['page']) ? $_GET['page'] : '0';
  126. $results = tripal_get_remote_pubs($remote_db, $search_array, $limit, $page);
  127. $total_records = $results['total_records'];
  128. $search_str = $results['search_str'];
  129. $pubs = $results['pubs'];
  130. // Iterate through the results and construct the table displaying the
  131. // publications
  132. $rows = array();
  133. $i = $page * $limit + 1;
  134. if (count($pubs) > 0) {
  135. foreach ($pubs as $pub) {
  136. $citation = array_key_exists('Citation', $pub) ? htmlspecialchars($pub['Citation']) : 'Unable to generate citation';
  137. $raw_link = '';
  138. if(array_key_exists('Publication Dbxref', $pub) and $pub['Publication Dbxref']) {
  139. $raw_link = l('raw', 'admin/tripal/chado/tripal_pub/import/raw/' . $pub['Publication Dbxref'], array('attributes' => array('target' => '_blank')));
  140. }
  141. $rows[] = array(
  142. number_format($i),
  143. $citation,
  144. $raw_link,
  145. );
  146. $i++;
  147. }
  148. }
  149. if (count($rows) == 0) {
  150. $rows[] = array(
  151. array(
  152. 'data' => 'No results found',
  153. 'colspan' => 3,
  154. ),
  155. );
  156. }
  157. $headers = array('', 'Publication', 'Raw Results');
  158. $table = array(
  159. 'header' => $headers,
  160. 'rows' => $rows,
  161. 'attributes' => array(
  162. 'id' => 'tripal_pub-importer-test',
  163. 'class' => 'tripal-data-table'
  164. ),
  165. 'sticky' => FALSE,
  166. 'caption' => '',
  167. 'colgroups' => array(),
  168. 'empty' => '',
  169. );
  170. // once we have our table array structure defined, we call Drupal's theme_table()
  171. // function to generate the table.
  172. $table = theme_table($table);
  173. // generate the pager
  174. pager_default_initialize($total_records, $limit);
  175. $pager = array(
  176. 'tags' => array(),
  177. 'element' => 0,
  178. 'parameters' => array(),
  179. 'quantity' => $limit,
  180. );
  181. $pager = theme_pager($pager);
  182. // because this is an ajax callback, the theme_pager will set the URL to be
  183. // "system/ajax", so we need to reset that
  184. $pager = str_replace($base_path . "system/ajax", "", $pager) ;
  185. // join all to form the results
  186. $total_pages = (int) ($total_records / $limit) + 1;
  187. $page = isset($_GET['page']) ? $_GET['page'] : '0';
  188. $output .= "$pager<br><b>Found " . number_format($total_records) . " publications. Page " . ($page + 1) . " of $total_pages.</b> " .
  189. "<br>$remote_db Search String: $search_str $table<br>$pager";
  190. }
  191. }
  192. return $output;
  193. }
  194. /**
  195. * The form used for creating publication importers.
  196. *
  197. * @param $form
  198. * The Drupal form
  199. * @param $form_state
  200. * The form state
  201. * @param $pub_import_id
  202. * The publication importer ID
  203. * @param $action
  204. * The action to perform
  205. *
  206. * @return
  207. * A form array
  208. *
  209. * @ingroup tripal_pub
  210. */
  211. function tripal_pub_importer_setup_form($form, &$form_state = NULL, $pub_import_id = NULL, $action = 'new') {
  212. // Default values can come in the following ways:
  213. //
  214. // 1) as elements of the $pub_importer object. This occurs when editing an existing importer
  215. // 2) in the $form_state['values'] array which occurs on a failed validation or
  216. // ajax callbacks from non submit form elements
  217. // 3) in the $form_state['input'] array which occurs on ajax callbacks from submit
  218. // form elements and the form is being rebuilt
  219. //
  220. // set form field defaults
  221. // Set the default values. If the pub_import_id isn't already defined by the form values
  222. // and one is provided then look it up in the database
  223. $criteria = NULL;
  224. $remote_db = '';
  225. $days = '';
  226. $disabled = '';
  227. $do_contact = '';
  228. $num_criteria = 1;
  229. $loader_name = '';
  230. // if this is an edit the we are pulling an import object from the database
  231. if ($action == "edit") {
  232. $sql = "SELECT * FROM {tripal_pub_import} WHERE pub_import_id = :pub_import_id";
  233. $importer = db_query($sql, array(':pub_import_id' => $pub_import_id))->fetchObject();
  234. $criteria = unserialize($importer->criteria);
  235. $remote_db = $criteria['remote_db'];
  236. $days = $criteria['days'];
  237. $disabled = $criteria['disabled'];
  238. $do_contact = $criteria['do_contact'];
  239. $num_criteria = $criteria['num_criteria'];
  240. $loader_name = $criteria['loader_name'];
  241. }
  242. // if there are any session variables then use those
  243. if (array_key_exists('tripal_pub_import', $_SESSION)) {
  244. $remote_db = $_SESSION['tripal_pub_import']['remote_db'];
  245. $days = $_SESSION['tripal_pub_import']['days'];
  246. $disabled = $_SESSION['tripal_pub_import']['disabled'];
  247. $do_contact = $_SESSION['tripal_pub_import']['do_contact'];
  248. $num_criteria = $_SESSION['tripal_pub_import']['num_criteria'];
  249. $loader_name = $_SESSION['tripal_pub_import']['loader_name'];
  250. // check if the pub_import_id in the session variable is not the same as the one we've been provided
  251. // if so, then clear the session variable
  252. if ($pub_import_id and $pub_import_id != $_SESSION['tripal_pub_import']['pub_import_id']) {
  253. unset($_SESSION['tripal_pub_import']);
  254. }
  255. }
  256. // if we are re constructing the form from a failed validation or ajax callback
  257. // then use the $form_state['values'] values
  258. if (array_key_exists('values', $form_state)) {
  259. $remote_db = $form_state['values']['remote_db'];
  260. $days = $form_state['values']['days'];
  261. $disabled = $form_state['values']['disabled'];
  262. $do_contact = $form_state['values']['do_contact'];
  263. $num_criteria = $form_state['values']['num_criteria'];
  264. $loader_name = $form_state['values']['loader_name'];
  265. }
  266. // if we are re building the form from after submission (from ajax call) then
  267. // the values are in the $form_state['input'] array
  268. if (array_key_exists('input', $form_state) and !empty($form_state['input'])) {
  269. $remote_db = $form_state['input']['remote_db'];
  270. $days = $form_state['input']['days'];
  271. $disabled = $form_state['input']['disabled'];
  272. $do_contact = $form_state['input']['do_contact'];
  273. $loader_name = $form_state['input']['loader_name'];
  274. // because the num_criteria is a value and not a visible or hidden form
  275. // element it is not part of the ['input'] array, so we need to get it from the form
  276. $num_criteria = $form_state['complete form']['num_criteria']['#value'];
  277. }
  278. if (array_key_exists('triggering_element', $form_state) and
  279. $form_state['triggering_element']['#name'] == 'add') {
  280. $num_criteria++;
  281. }
  282. if (array_key_exists('triggering_element', $form_state) and
  283. $form_state['triggering_element']['#name'] == 'remove') {
  284. $num_criteria--;
  285. }
  286. // set the values we need for later but that should not be shown on the form
  287. $form['num_criteria']= array(
  288. '#type' => 'value',
  289. '#value' => $num_criteria,
  290. );
  291. $form['pub_import_id'] = array(
  292. '#type' => 'value',
  293. '#value' => $pub_import_id,
  294. );
  295. $form['action'] = array(
  296. '#type' => 'value',
  297. '#value' => $action,
  298. );
  299. // add in the elements that will be organized via a theme function
  300. $form['themed_element']['loader_name'] = array(
  301. '#type' => 'textfield',
  302. '#title' => t('Loader Name'),
  303. '#description' => t('Please provide a name for this loader setup.'),
  304. '#default_value' => $loader_name,
  305. '#required' => TRUE,
  306. );
  307. $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
  308. $remote_dbs = array();
  309. $values = array(
  310. 'name' => $supported_dbs,
  311. );
  312. $dbs = chado_select_record('db', array('*'), $values);
  313. foreach ($dbs as $index => $db) {
  314. $remote_dbs[$db->name] = $db->description;
  315. };
  316. // use PubMed as the default
  317. if (!$remote_db) {
  318. $remote_db = 'PMID';
  319. }
  320. $form['themed_element']['remote_db'] = array(
  321. '#title' => t('Remote Database'),
  322. '#type' => 'select',
  323. '#options' => $remote_dbs,
  324. '#default_value' => $remote_db,
  325. '#ajax' => array(
  326. 'callback' => "tripal_pubs_setup_form_ajax_update",
  327. 'wrapper' => 'tripal-pubs-importer-setup',
  328. 'effect' => 'fade',
  329. 'method' => 'replace',
  330. ),
  331. );
  332. $form['themed_element']['days'] = array(
  333. '#type' => 'textfield',
  334. '#title' => t('Days since record modified'),
  335. '#description' => t('Limit the search to include pubs that have been added no more than this many days before today.'),
  336. '#default_value' => $days,
  337. '#size' => 5,
  338. );
  339. $form['themed_element']['disabled'] = array(
  340. '#type' => 'checkbox',
  341. '#title' => t('Disabled'),
  342. '#description' => t('Check to disable this importer.'),
  343. '#default_value' => $disabled,
  344. );
  345. $form['themed_element']['do_contact'] = array(
  346. '#type' => 'checkbox',
  347. '#title' => t('Create Contact'),
  348. '#description' => t('Check to create an entry in the contact table for each author of a matching publication during import. This allows storage of
  349. additional information such as affilation, etc. Otherwise, only authors names are retrieved.'),
  350. '#default_value' => $do_contact,
  351. );
  352. // add in the form for the criteria
  353. tripal_pub_importer_setup_add_criteria_fields($form, $form_state, $num_criteria, $criteria);
  354. // add in the buttons
  355. $form['save'] = array(
  356. '#type' => 'submit',
  357. '#value' => t('Save Importer'),
  358. );
  359. $form['test'] = array(
  360. '#type' => 'submit',
  361. '#value' => t('Test Importer'),
  362. );
  363. $form['delete'] = array(
  364. '#type' => 'submit',
  365. '#value' => t('Delete Importer'),
  366. '#attributes' => array('style' => 'float: right;')
  367. );
  368. // add in the section where the test results will appear
  369. $form['results'] = array(
  370. '#markup' => '<div id="tripal-pub-importer-test-section"></div>',
  371. );
  372. // allow the selected remote database to make changes to the form if needed
  373. $callback = "tripal_pub_remote_alter_form_$remote_db";
  374. $form = call_user_func($callback, $form, $form_state, $num_criteria);
  375. $form['themed_element']['#theme'] = 'tripal_pub_importer_setup_form_elements';
  376. return $form;
  377. }
  378. /**
  379. * A helper function for the importer setup form that adds the criteria to
  380. * the form that belong to the importer.
  381. *
  382. * @param $form
  383. * The form
  384. * @param $form_state
  385. * The form state
  386. * @param $num_criteria
  387. * The number of criteria that exist for the importer
  388. * @param $criteria
  389. * An array containing the criteria
  390. *
  391. *@return
  392. * A form array
  393. *
  394. * @ingroup tripal_pub
  395. */
  396. function tripal_pub_importer_setup_add_criteria_fields(&$form, &$form_state, $num_criteria, $criteria){
  397. // choices array
  398. $scope_choices = array(
  399. 'any' => 'Any Field',
  400. 'abstract' => 'Abstract',
  401. 'author' => 'Author',
  402. 'id' => 'Accession',
  403. 'title' => 'Title',
  404. 'journal' => 'Journal Name'
  405. );
  406. $first_op_choices = array(
  407. '' => '',
  408. 'NOT' => 'NOT'
  409. );
  410. $op_choices = array(
  411. 'AND' => 'AND',
  412. 'OR' => 'OR',
  413. 'NOT' => 'NOT'
  414. );
  415. for($i = 1; $i <= $num_criteria; $i++) {
  416. $is_phrase = 1;
  417. $search_terms = '';
  418. $scope = '';
  419. $is_phrase = '';
  420. $operation = '';
  421. // if we have criteria supplied from the database then use that as the initial defaults
  422. if ($criteria) {
  423. $search_terms = $criteria['criteria'][$i]['search_terms'];
  424. $scope = $criteria['criteria'][$i]['scope'];
  425. $is_phrase = $criteria['criteria'][$i]['is_phrase'];
  426. $operation = $criteria['criteria'][$i]['operation'];
  427. }
  428. // if the criteria comes the session
  429. if (array_key_exists('tripal_pub_import', $_SESSION)) {
  430. $search_terms = isset($_SESSION['tripal_pub_import']['criteria'][$i]['search_terms']) ? $_SESSION['tripal_pub_import']['criteria'][$i]['search_terms'] : $search_terms;
  431. $scope = isset($_SESSION['tripal_pub_import']['criteria'][$i]['scope']) ? $_SESSION['tripal_pub_import']['criteria'][$i]['scope'] : $scope;
  432. $is_phrase = isset($_SESSION['tripal_pub_import']['criteria'][$i]['is_phrase']) ? $_SESSION['tripal_pub_import']['criteria'][$i]['is_phrase'] : $is_phrase;
  433. $operation = isset($_SESSION['tripal_pub_import']['criteria'][$i]['operation']) ? $_SESSION['tripal_pub_import']['criteria'][$i]['operation'] : $operation;
  434. }
  435. // If the form_state has variables then use those. This happens when an error occurs on the form or the
  436. // form is resbumitted using AJAX
  437. if (array_key_exists('values', $form_state)) {
  438. $search_terms = $form_state['values']["search_terms-$i"];
  439. $scope = $form_state['values']["scope-$i"];
  440. $is_phrase = $form_state['values']["is_phrase-$i"];
  441. $operation = $form_state['values']["operation-$i"];
  442. }
  443. $form['themed_element']['criteria'][$i]["scope-$i"] = array(
  444. '#type' => 'select',
  445. '#description' => t('Please select the fields to search for this term.'),
  446. '#options' => $scope_choices,
  447. '#default_value' => $scope,
  448. );
  449. $form['themed_element']['criteria'][$i]["search_terms-$i"] = array(
  450. '#type' => 'textfield',
  451. '#description' => t('<span style="white-space: normal">Please provide a list of words for searching. You may use
  452. conjunctions such as "AND" or "OR" to separate words if they are expected in
  453. the same scope, but do not mix ANDs and ORs. Check the "Is Phrase" checkbox to use conjunctions as part of the text to search</span>'),
  454. '#default_value' => $search_terms,
  455. '#required' => TRUE,
  456. '#maxlength' => 2048,
  457. );
  458. $form['themed_element']['criteria'][$i]["is_phrase-$i"] = array(
  459. '#type' => 'checkbox',
  460. '#title' => t('Is Phrase?'),
  461. '#default_value' => $is_phrase,
  462. );
  463. if ($i == 1) {
  464. /*
  465. $form['criteria'][$i]["operation-$i"] = array(
  466. '#type' => 'select',
  467. '#options' => $first_op_choices,
  468. '#default_value' => $operation,
  469. );*/
  470. }
  471. if ($i > 1) {
  472. $form['themed_element']['criteria'][$i]["operation-$i"] = array(
  473. '#type' => 'select',
  474. '#options' => $op_choices,
  475. '#default_value' => $operation,
  476. );
  477. }
  478. if ($i == $num_criteria) {
  479. if($i > 1) {
  480. $form['themed_element']['criteria'][$i]["remove-$i"] = array(
  481. '#type' => 'button',
  482. '#name' => 'remove',
  483. '#value' => t('Remove'),
  484. '#ajax' => array(
  485. 'callback' => "tripal_pubs_setup_form_ajax_update",
  486. 'wrapper' => 'tripal-pubs-importer-setup',
  487. 'effect' => 'fade',
  488. 'method' => 'replace',
  489. 'prevent' => 'click'
  490. ),
  491. // When this button is clicked, the form will be validated and submitted.
  492. // Therefore, we set custom submit and validate functions to override the
  493. // default form submit. In the validate function we set the form_state
  494. // to rebuild the form so the submit function never actually gets called,
  495. // but we need it or Drupal will run the default validate anyway.
  496. // we also set #limit_validation_errors to empty so fields that
  497. // are required that don't have values won't generate warnings.
  498. '#submit' => array('tripal_pub_setup_form_ajax_button_submit'),
  499. '#validate' => array('tripal_pub_setup_form_ajax_button_validate'),
  500. '#limit_validation_errors' => array(),
  501. );
  502. }
  503. $form['themed_element']['criteria'][$i]["add-$i"] = array(
  504. '#type' => 'button',
  505. '#name' => 'add',
  506. '#value' => t('Add'),
  507. '#ajax' => array(
  508. 'callback' => "tripal_pubs_setup_form_ajax_update",
  509. 'wrapper' => 'tripal-pubs-importer-setup',
  510. 'effect' => 'fade',
  511. 'method' => 'replace',
  512. 'prevent' => 'click'
  513. ),
  514. // When this button is clicked, the form will be validated and submitted.
  515. // Therefore, we set custom submit and validate functions to override the
  516. // default form submit. In the validate function we set the form_state
  517. // to rebuild the form so the submit function never actually gets called,
  518. // but we need it or Drupal will run the default validate anyway.
  519. // we also set #limit_validation_errors to empty so fields that
  520. // are required that don't have values won't generate warnings.
  521. '#submit' => array('tripal_pub_setup_form_ajax_button_submit'),
  522. '#validate' => array('tripal_pub_setup_form_ajax_button_validate'),
  523. '#limit_validation_errors' => array(),
  524. );
  525. }
  526. }
  527. }
  528. /**
  529. * This function is used to rebuild the form if an ajax call is made vai a button.
  530. * The button causes the form to be submitted. We don't want this so we override
  531. * the validate and submit routines on the form button. Therefore, this function
  532. * only needs to tell Drupal to rebuild the form
  533. *
  534. * @ingroup tripal_pub
  535. */
  536. function tripal_pub_setup_form_ajax_button_validate($form, &$form_state){
  537. $form_state['rebuild'] = TRUE;
  538. }
  539. /**
  540. * This function is just a dummy to override the default form submit on ajax calls for buttons
  541. *
  542. * @ingroup tripal_pub
  543. */
  544. function tripal_pub_setup_form_ajax_button_submit($form, &$form_state){
  545. // do nothing
  546. }
  547. /**
  548. * Validate the tripal_pub_importer_setup_form form
  549. *
  550. * @ingroup tripal_pub
  551. */
  552. function tripal_pub_importer_setup_form_validate($form, &$form_state) {
  553. $num_criteria = $form_state['values']['num_criteria'];
  554. $remote_db = $form_state['values']["remote_db"];
  555. $days = trim($form_state['values']["days"]);
  556. $disabled = $form_state['values']["disabled"];
  557. $do_contact = $form_state['values']["do_contact"];
  558. $loader_name = trim($form_state['values']["loader_name"]);
  559. for ($i = 1; $i <= $num_criteria; $i++) {
  560. $search_terms = trim($form_state['values']["search_terms-$i"]);
  561. $scope = $form_state['values']["scope-$i"];
  562. $is_phrase = $form_state['values']["is_phrase-$i"];
  563. $operation = '';
  564. if($i > 1) {
  565. $operation = $form_state['values']["operation-$i"];
  566. }
  567. if (!$is_phrase) {
  568. if (preg_match('/and/i', $search_terms) and preg_match('/or/i', $search_terms)) {
  569. form_set_error("search_terms-$i", "You may use 'AND' or 'OR' but cannot use both. Add a new entry below with the same scope for the other conunction.");
  570. $_SESSION['tripal_pub_import']['perform_search'] = 0;
  571. }
  572. }
  573. }
  574. if ($days and !is_numeric($days) or preg_match('/\./', $days)) {
  575. form_set_error("days", "Please enter a numeric, non decimal value, for the number of days.");
  576. $_SESSION['tripal_pub_import']['perform_search'] = 0;
  577. }
  578. // allow the selected remote database to validate any changes to the form if needed
  579. $callback = "tripal_pub_remote_validate_form_$remote_db";
  580. $form = call_user_func($callback, $form, $form_state);
  581. }
  582. /**
  583. * Submit the tripal_pub_importer_setup_form form
  584. *
  585. * @ingroup tripal_pub
  586. */
  587. function tripal_pub_importer_setup_form_submit($form, &$form_state) {
  588. $pub_import_id = $form_state['values']['pub_import_id'];
  589. $num_criteria = $form_state['values']['num_criteria'];
  590. $remote_db = $form_state['values']["remote_db"];
  591. $days = trim($form_state['values']["days"]);
  592. $loader_name = trim($form_state['values']["loader_name"]);
  593. $disabled = $form_state['values']["disabled"];
  594. $do_contact = $form_state['values']["do_contact"];
  595. // set the session variables
  596. $_SESSION['tripal_pub_import']['remote_db'] = $remote_db;
  597. $_SESSION['tripal_pub_import']['days'] = $days;
  598. $_SESSION['tripal_pub_import']['num_criteria'] = $num_criteria;
  599. $_SESSION['tripal_pub_import']['loader_name'] = $loader_name;
  600. $_SESSION['tripal_pub_import']['disabled'] = $disabled;
  601. $_SESSION['tripal_pub_import']['do_contact'] = $do_contact;
  602. $_SESSION['tripal_pub_import']['pub_import_id'] = $pub_import_id;
  603. unset($_SESSION['tripal_pub_import']['criteria']);
  604. for ($i = 1; $i <= $num_criteria; $i++) {
  605. $search_terms = trim($form_state['values']["search_terms-$i"]);
  606. $scope = $form_state['values']["scope-$i"];
  607. $is_phrase = $form_state['values']["is_phrase-$i"];
  608. $operation = '';
  609. if ($i > 1) {
  610. $operation = $form_state['values']["operation-$i"];
  611. }
  612. $_SESSION['tripal_pub_import']['criteria'][$i] = array(
  613. 'search_terms' => $search_terms,
  614. 'scope' => $scope,
  615. 'is_phrase' => $is_phrase,
  616. 'operation' => $operation
  617. );
  618. }
  619. // now perform the appropriate action for the button clicked
  620. if ($form_state['values']['op'] == 'Test Importer') {
  621. $_SESSION['tripal_pub_import']['perform_search'] = 1;
  622. }
  623. if ($form_state['values']['op'] == 'Save Importer' or
  624. $form_state['values']['op'] == 'Save & Import Now') {
  625. $record = array(
  626. 'name' => $loader_name,
  627. 'criteria' => serialize($_SESSION['tripal_pub_import']),
  628. 'disabled' => $disabled,
  629. 'do_contact' => $do_contact
  630. );
  631. // first check to see if this pub_import_id is already present. If so,
  632. // do an update rather than an insert
  633. $sql = "SELECT * FROM {tripal_pub_import} WHERE pub_import_id = :pub_import_id";
  634. $importer = db_query($sql, array(':pub_import_id' => $pub_import_id))->fetchObject();
  635. if($importer) {
  636. // do the update
  637. $record['pub_import_id'] = $pub_import_id;
  638. if(drupal_write_record('tripal_pub_import', $record, 'pub_import_id')){
  639. unset($_SESSION['tripal_pub_import']);
  640. drupal_set_message('Publication import settings updated.');
  641. drupal_goto('admin/tripal/chado/tripal_pub/import_list');
  642. }
  643. else {
  644. drupal_set_message('Could not update publication import settings.', 'error');
  645. }
  646. }
  647. else {
  648. // do the insert
  649. if(drupal_write_record('tripal_pub_import', $record)){
  650. unset($_SESSION['tripal_pub_import']);
  651. drupal_set_message('Publication import settings saved.');
  652. // if the user wants to do the import now then do it (may time out
  653. // for long jobs)
  654. if ($form_state['values']['op'] == 'Save & Import Now') {
  655. tripal_execute_pub_importer($record['pub_import_id']);
  656. }
  657. drupal_goto('admin/tripal/chado/tripal_pub/import_list');
  658. }
  659. else {
  660. drupal_set_message('Could not save publication import settings.', 'error');
  661. }
  662. }
  663. }
  664. if ($form_state['values']['op'] == 'Delete Importer') {
  665. $sql = "DELETE FROM {tripal_pub_import} WHERE pub_import_id = :pub_import_id";
  666. $success = db_query($sql, array(':pub_import_id' => $pub_import_id));
  667. if ($success) {
  668. drupal_set_message('Publication importer deleted.');
  669. drupal_goto('admin/tripal/chado/tripal_pub/import_list');
  670. }
  671. else {
  672. drupal_set_message('Could not delete publication importer.', 'error');
  673. }
  674. }
  675. }
  676. /**
  677. * AJAX callback for updating the form.
  678. *
  679. * @ingroup tripal_pub
  680. */
  681. function tripal_pubs_setup_form_ajax_update($form, $form_state) {
  682. return $form['themed_element'];
  683. }
  684. /**
  685. * Theme the tripal_pub_importer_setup_form form.
  686. *
  687. * @ingroup tripal_pub
  688. */
  689. function theme_tripal_pub_importer_setup_form_elements($variables) {
  690. $form = $variables['form'];
  691. // first render the fields at the top of the form
  692. $markup = '';
  693. $markup .= '<div id="pub-search-form-row0">';
  694. $markup .= ' <div id="pub-search-form-row0-col1" style="float: left">' . drupal_render($form['remote_db']) . '</div>';
  695. $markup .= ' <div id="pub-search-form-row0-col2" style="float: left; margin-left: 10px">' . drupal_render($form['loader_name']) . '</div>';
  696. $markup .= '</div>';
  697. $markup .= '<div id="pub-search-form-row1" style="clear:both">';
  698. $markup .= ' <div id="pub-search-form-row1-col1">' . drupal_render($form['days']) . '</div>';
  699. $markup .= '</div>';
  700. $markup .= '<div id="pub-search-form-row2">' . drupal_render($form['disabled']) . '</div>';
  701. $markup .= '<div id="pub-search-form-row3">' . drupal_render($form['do_contact']) . '</div>';
  702. // next render the criteria fields into a table format
  703. $rows = array();
  704. foreach ($form['criteria'] as $i => $element) {
  705. if(is_numeric($i)) {
  706. $rows[] = array(
  707. drupal_render($element["operation-$i"]),
  708. drupal_render($element["scope-$i"]),
  709. drupal_render($element["search_terms-$i"]),
  710. drupal_render($element["is_phrase-$i"]),
  711. drupal_render($element["add-$i"]) . drupal_render($element["remove-$i"]),
  712. );
  713. }
  714. }
  715. $headers = array('Operation','Scope', 'Search Terms', '','');
  716. $table = array(
  717. 'header' => $headers,
  718. 'rows' => $rows,
  719. 'attributes' => array(
  720. 'class' => array('tripal-data-table')
  721. ),
  722. 'sticky' => TRUE,
  723. 'caption' => '',
  724. 'colgroups' => array(),
  725. 'empty' => '',
  726. );
  727. $criteria_table = theme_table($table);
  728. $markup .= $criteria_table;
  729. // add the rendered form
  730. $form = array(
  731. '#markup' => $markup,
  732. '#prefix' => '<div id="tripal-pubs-importer-setup">',
  733. '#suffix' => '</div>',
  734. );
  735. return drupal_render($form);
  736. }
  737. /**
  738. * Add a job to import publications
  739. *
  740. * @param $pub_importer_id
  741. * The id of the importer to submit a job to update
  742. *
  743. * @ingroup tripal_pub
  744. */
  745. function tripal_pub_importer_submit_job($import_id) {
  746. global $user;
  747. // get all of the loaders
  748. $args = array(':import_id' => $import_id);
  749. $sql = "SELECT * FROM {tripal_pub_import} WHERE pub_import_id = :import_id ";
  750. $import = db_query($sql, $args)->fetchObject();
  751. $args = array($import_id);
  752. tripal_add_job("Import publications $import->name", 'tripal_pub',
  753. 'tripal_execute_pub_importer', $args, $user->uid);
  754. drupal_goto('admin/tripal/chado/tripal_pub/import_list');
  755. }
  756. /**
  757. * Deletes a publication importer.
  758. *
  759. */
  760. function tripal_pub_importer_delete($import_id) {
  761. $args = array(':import_id' => $import_id);
  762. $sql = "DELETE FROM {tripal_pub_import} WHERE pub_import_id = :import_id";
  763. $success = db_query($sql, $args);
  764. if ($success) {
  765. drupal_set_message('Publication importer deleted.');
  766. drupal_goto('admin/tripal/chado/tripal_pub/import_list');
  767. }
  768. else {
  769. drupal_set_message('Could not delete publication importer.', 'error');
  770. }
  771. }
  772. /**
  773. * Adds publications that have been retrieved from a remote database and
  774. * consolidated into an array of details.
  775. *
  776. * @param $pubs
  777. * An array containing a list of publications to add to Chado. The
  778. * array contains a set of details for the publication.
  779. * @param $do_contact
  780. * Set to TRUE if authors should automatically have a contact record added
  781. * to Chado.
  782. * @param $update
  783. * If set to TRUE then publications that already exist in the Chado database
  784. * will be updated, whereas if FALSE only new publications will be added
  785. *
  786. * @return
  787. * Returns an array containing the number of publications that were
  788. * inserted, updated, skipped and which had an error during import.
  789. *
  790. * @ingroup tripal_pub
  791. */
  792. function tripal_pub_add_publications($pubs, $do_contact, $update = FALSE) {
  793. $report = array();
  794. $report['error'] = 0;
  795. $report['inserted'] = array();
  796. $report['skipped'] = array();
  797. $total_pubs = count($pubs);
  798. // iterate through the publications and add each one
  799. $i = 1;
  800. foreach ($pubs as $pub) {
  801. $memory = number_format(memory_get_usage()) . " bytes";
  802. print "Processing $i of $total_pubs. Memory usage: $memory.\r";
  803. // add the publication to Chado
  804. $action = '';
  805. $pub_id = tripal_pub_add_publication($pub, $action, $do_contact, $update);
  806. if ($pub_id){
  807. // add the publication cross reference (e.g. to PubMed)
  808. if ($pub_id and $pub['Publication Dbxref']) {
  809. $dbxref = array();
  810. if (preg_match('/^(.*?):(.*?)$/', trim($pub['Publication Dbxref']), $matches)) {
  811. $dbxref['db_name'] = $matches[1];
  812. $dbxref['accession'] = $matches[2];
  813. }
  814. else {
  815. tripal_report_error(
  816. 'tripal_pub',
  817. TRIPAL_ERROR,
  818. 'Unable to extract the dbxref to be associated with the publication (pub ID=@pub_id) from @dbxref. This reference should be [database-name]:[accession]',
  819. array('@pub_id' => $pub_id, '@dbxref' => $pub['Publication Dbxref'])
  820. );
  821. }
  822. $pub_dbxref = tripal_associate_dbxref('pub', $pub_id, $dbxref);
  823. }
  824. $pub['pub_id'] = $pub_id;
  825. }
  826. switch ($action) {
  827. case 'error':
  828. $report['error']++;
  829. break;
  830. case 'inserted':
  831. $report['inserted'][] = $pub;
  832. break;
  833. case 'updated':
  834. $report['updated'][] = $pub;
  835. break;
  836. case 'skipped':
  837. $report['skipped'][] = $pub;
  838. break;
  839. }
  840. $i++;
  841. }
  842. print "\n";
  843. return $report;
  844. }
  845. /**
  846. * Adds a new publication to the Chado, along with all properties and
  847. * database cross-references. If the publication does not already exist
  848. * in Chado then it is added. If it does exist nothing is done. If
  849. * the $update parameter is TRUE then the publication is updated if it exists.
  850. *
  851. * @param $pub_details
  852. * An associative array containing all of the details about the publication.
  853. * @param $action
  854. * This variable will get set to a text value indicating the action that was
  855. * performed. The values include 'skipped', 'inserted', 'updated' or 'error'.
  856. * @param $do_contact
  857. * Optional. Set to TRUE if a contact entry should be added to the Chado contact table
  858. * for authors of the publication.
  859. * @param $update_if_exists
  860. * Optional. If the publication already exists then this function will return
  861. * without adding a new publication. However, set this value to TRUE to force
  862. * the function to pudate the publication using the $pub_details that are provided.
  863. *
  864. * @return
  865. * If the publication already exists, is inserted or updated then the publication
  866. * ID is returned, otherwise FALSE is returned. If the publication already exists
  867. * and $update_if_exists is not TRUE then the $action variable is set to 'skipped'.
  868. * If the publication already exists and $update_if_exists is TRUE and if the update
  869. * was successful then $action is set to 'updated'. Otherwise on successful insert
  870. * the $action variable is set to 'inserted'. If the function failes then the
  871. * $action variable is set to 'error'
  872. *
  873. * @ingroup tripal_pub
  874. */
  875. function tripal_pub_add_publication($pub_details, &$action, $do_contact = FALSE, $update_if_exists = FALSE) {
  876. $pub_id = 0;
  877. if (!is_array($pub_details)) {
  878. return FALSE;
  879. }
  880. // before proceeding check to see if the publication already exists. If there is only one match
  881. // and the $update_if_exists is NOT set then return FALSE
  882. $pub_ids = tripal_publication_exists($pub_details);
  883. if(count($pub_ids) == 1 and !$update_if_exists) {
  884. tripal_report_error('tripal_pub', TRIPAL_NOTICE,
  885. "There is a publication that is a duplicate of this publication. Cannot continue. It either ".
  886. "has a matching Dbxref (e.g. PubMed ID), a non-unique citation or matches on the unique " .
  887. "constraint set by the Tripal publication module configuration page. \nCitation: %title %dbxref.\nMatching Pub id: %ids",
  888. array(
  889. '%title' => $pub_details['Citation'],
  890. '%dbxref' => $pub_details['Publication Dbxref'],
  891. '%ids' => implode(",", $pub_ids),
  892. )
  893. );
  894. $action = 'skipped';
  895. return FALSE;
  896. }
  897. // if we have more than one matching pub then return an error as we don't know which to update even if
  898. // update_if_exists is set to TRUE
  899. if(count($pub_ids) > 1) {
  900. tripal_report_error('tripal_pub', TRIPAL_NOTICE,
  901. "There are %num publications that are duplicates of this publication. They either " .
  902. "have a matching Dbxref (e.g. PubMed ID) or match on the unique constraint set by the Tripal publication module ".
  903. "configuration page. \nCitation: %title %dbxref.\nMatching Pub ids: %ids",
  904. array(
  905. '%num' => count($pub_ids),
  906. '%title' => $pub_details['Citation'],
  907. '%dbxref' => $pub_details['Publication Dbxref'],
  908. '%ids' => implode(",", $pub_ids),
  909. )
  910. );
  911. $action = 'skipped';
  912. return FALSE;
  913. }
  914. if(count($pub_ids) == 1 and $update_if_exists) {
  915. $pub_id = $pub_ids[0];
  916. }
  917. // get the publication type (use the first publication type)
  918. if (array_key_exists('Publication Type', $pub_details)) {
  919. $pub_type = '';
  920. if(is_array($pub_details['Publication Type'])) {
  921. $pub_type = $pub_details['Publication Type'][0];
  922. }
  923. else {
  924. $pub_type = $pub_details['Publication Type'];
  925. }
  926. $identifiers = array(
  927. 'name' => $pub_type,
  928. 'cv_id' => array(
  929. 'name' => 'tripal_pub'
  930. ),
  931. );
  932. $pub_type = tripal_get_cvterm($identifiers);
  933. }
  934. else {
  935. tripal_report_error('tripal_pub', TRIPAL_ERROR,
  936. "The Publication Type is a required property but is missing", array());
  937. $action = 'error';
  938. return FALSE;
  939. }
  940. if (!$pub_type) {
  941. tripal_report_error('tripal_pub', TRIPAL_ERROR, "Cannot find publication type: '%type'",
  942. array('%type' => $pub_details['Publication Type'][0]));
  943. $action = 'error';
  944. return FALSE;
  945. }
  946. // the series name field in the pub table is only 255 characters, so we should trim just in case
  947. $series_name = '';
  948. if (array_key_exists('Series_Name', $pub_details)) {
  949. $series_name = substr($pub_details['Series Name'], 0, 255);
  950. }
  951. if (array_key_exists('Journal Name', $pub_details)) {
  952. $series_name = substr($pub_details['Journal Name'], 0, 255);
  953. }
  954. // build the values array for inserting or updating
  955. $values = array(
  956. 'title' => $pub_details['Title'],
  957. 'volume' => (isset($pub_details['Volume'])) ? $pub_details['Volume'] : '',
  958. 'series_name' => $series_name,
  959. 'issue' => (isset($pub_details['Issue'])) ? $pub_details['Issue'] : '',
  960. 'pyear' => (isset($pub_details['Year'])) ? $pub_details['Year'] : '',
  961. 'pages' => (isset($pub_details['Pages'])) ? $pub_details['Pages'] : '',
  962. 'uniquename' => $pub_details['Citation'],
  963. 'type_id' => $pub_type->cvterm_id,
  964. );
  965. // if there is no pub_id then we need to do an insert.
  966. if (!$pub_id) {
  967. $options = array('statement_name' => 'ins_pub_tivoseispypaunty');
  968. $pub = chado_insert_record('pub', $values, $options);
  969. if (!$pub) {
  970. tripal_report_error('tripal_pub', TRIPAL_ERROR, "Cannot insert the publication with title: %title",
  971. array('%title' => $pub_details['Title']));
  972. $action = 'error';
  973. return FALSE;
  974. }
  975. $pub_id = $pub['pub_id'];
  976. $action = 'inserted';
  977. }
  978. // if there is a pub_id and we've been told to update, then do the update
  979. if ($pub_id and $update_if_exists) {
  980. $match = array('pub_id' => $pub_id);
  981. $options = array('statement_name' => 'up_pub_tivoseispypaunty');
  982. $success = chado_update_record('pub', $match, $values, $options);
  983. if (!$success) {
  984. tripal_report_error('tripal_pub', TRIPAL_ERROR, "Cannot update the publication with title: %title",
  985. array('%title' => $pub_details['Title']));
  986. $action = 'error';
  987. return FALSE;
  988. }
  989. $action = 'updated';
  990. }
  991. // before we add any new properties we need to remove those that are there if this
  992. // is an update. The only thing we don't want to remove are the 'Publication Dbxref'
  993. if ($update_if_exists) {
  994. $sql = "
  995. DELETE FROM {pubprop}
  996. WHERE
  997. pub_id = :pub_id AND
  998. NOT type_id in (
  999. SELECT cvterm_id
  1000. FROM {cvterm}
  1001. WHERE name = 'Publication Dbxref'
  1002. )
  1003. ";
  1004. chado_query($sql, array(':pub_id' => $pub_id));
  1005. }
  1006. // iterate through the properties and add them
  1007. foreach ($pub_details as $key => $value) {
  1008. // the pub_details may have the raw search data (e.g. in XML from PubMed. We'll irgnore this for now
  1009. if($key == 'raw') {
  1010. continue;
  1011. }
  1012. // get the cvterm by name
  1013. $identifiers = array(
  1014. 'name' => $key,
  1015. 'cv_id' => array(
  1016. 'name' => 'tripal_pub'
  1017. ),
  1018. );
  1019. $cvterm = tripal_get_cvterm($identifiers);
  1020. // if we could not find the cvterm by name then try by synonym
  1021. //$cvterm = tripal_get_cvterm(array('name' => $key, 'cv_id' => array('name' => 'tripal_pub')));
  1022. if (!$cvterm) {
  1023. $identifiers = array(
  1024. 'synonym' => array(
  1025. 'name' => $key,
  1026. 'cv_name' => 'tripal_pub'
  1027. )
  1028. );
  1029. $cvterm = tripal_get_cvterm($identifiers);
  1030. }
  1031. if (!$cvterm) {
  1032. tripal_report_error('tripal_pub', TRIPAL_ERROR, "Cannot find term: '%prop'. Skipping.", array('%prop' => $key));
  1033. continue;
  1034. }
  1035. // skip details that won't be stored as properties
  1036. if ($key == 'Author List') {
  1037. tripal_pub_add_authors($pub_id, $value, $do_contact);
  1038. continue;
  1039. }
  1040. if ($key == 'Title' or $key == 'Volume' or $key == 'Journal Name' or $key == 'Issue' or
  1041. $key == 'Year' or $key == 'Pages') {
  1042. continue;
  1043. }
  1044. $success = 0;
  1045. if (is_array($value)) {
  1046. foreach ($value as $subkey => $subvalue) {
  1047. // if the key is an integer then this array is a simple list and
  1048. // we will insert using the primary key. Otheriwse, use the new key
  1049. if(is_int($subkey)) {
  1050. $success = chado_insert_property(
  1051. array('table' => 'pub', 'id' => $pub_id),
  1052. array('type_name' => $key, 'cv_name' => 'tripal_pub', 'value' => $subvalue)
  1053. );
  1054. }
  1055. else {
  1056. $success = chado_insert_property(
  1057. array('table' => 'pub', 'id' => $pub_id),
  1058. array('type_name' => $subkey, 'cv_name' => 'tripal_pub', 'value' => $subvalue)
  1059. );
  1060. }
  1061. }
  1062. }
  1063. else {
  1064. $success = chado_insert_property(
  1065. array('table' => 'pub', 'id' => $pub_id),
  1066. array('type_name' => $key, 'cv_name' => 'tripal_pub', 'value' => $value),
  1067. array('update_if_present' => TRUE)
  1068. );
  1069. }
  1070. if (!$success) {
  1071. tripal_report_error('tripal_pub', TRIPAL_ERROR, "Cannot add property '%prop' to publication. Skipping.",
  1072. array('%prop' => $key));
  1073. continue;
  1074. }
  1075. }
  1076. return $pub_id;
  1077. }
  1078. /**
  1079. * Add one or more authors to a publication
  1080. *
  1081. * @param $pub_id
  1082. * The publication ID of the pub in Chado.
  1083. * @param $authors
  1084. * An array of authors. Each author should have a set of keys/value pairs
  1085. * describing the author.
  1086. * @param $do_contact
  1087. * Optional. Set to TRUE if a contact entry should be added to the Chado contact table
  1088. * for authors of the publication.
  1089. * @ingroup tripal_pub
  1090. */
  1091. function tripal_pub_add_authors($pub_id, $authors, $do_contact) {
  1092. $rank = 0;
  1093. // first remove any of the existing pubauthor entires
  1094. $sql = "DELETE FROM {pubauthor} WHERE pub_id = :pub_id";
  1095. chado_query($sql, array(':pub_id' => $pub_id));
  1096. // iterate through the authors and add them to the pubauthors and contact
  1097. // tables of chado, then link them through the custom pubauthors_contact table
  1098. foreach ($authors as $author) {
  1099. // skip invalid author entires
  1100. if (isset($author['valid']) AND $author['valid'] == 'N') {
  1101. continue;
  1102. }
  1103. // remove the 'valid' property as we don't have a CV term for it
  1104. unset($author['valid']);
  1105. // construct the contact.name field using the author information
  1106. $name = '';
  1107. $type = 'Person';
  1108. if (isset($author['Given Name'])) {
  1109. $name .= $author['Given Name'];
  1110. }
  1111. if (isset($author['Surname'])) {
  1112. $name .= ' ' . $author['Surname'];
  1113. }
  1114. if (isset($author['Suffix'])) {
  1115. $name .= ' ' . $author['Suffix'];
  1116. }
  1117. if (isset($author['Collective'])) {
  1118. $name = $author['Collective'];
  1119. $type = 'Collective';
  1120. }
  1121. $name = trim($name);
  1122. // add an entry to the pubauthors table
  1123. $values = array(
  1124. 'pub_id' => $pub_id,
  1125. 'rank' => $rank,
  1126. 'surname' => $author['Surname'] ? substr($author['Surname'], 0, 100) : substr($author['Collective'], 0, 100),
  1127. 'givennames' => $author['Given Name'],
  1128. 'suffix' => $author['Suffix'],
  1129. );
  1130. $options = array('statement_name' => 'ins_pubauthor_idrasugisu');
  1131. $pubauthor = chado_insert_record('pubauthor', $values, $options);
  1132. // if the user wants us to create a contact for each author then do it.
  1133. if ($do_contact) {
  1134. // Add the contact
  1135. $contact = tripal_insert_contact(array(
  1136. 'name' => $name,
  1137. 'description' => '',
  1138. 'type_name' => $type,
  1139. 'properties' => $author
  1140. ));
  1141. // if we have succesfully added the contact and the pubauthor entries then we want to
  1142. // link them together
  1143. if ($contact and $pubauthor) {
  1144. // link the pubauthor entry to the contact
  1145. $values = array(
  1146. 'pubauthor_id' => $pubauthor['pubauthor_id'],
  1147. 'contact_id' => $contact['contact_id'],
  1148. );
  1149. $options = array('statement_name' => 'ins_pubauthorcontact_puco');
  1150. $pubauthor_contact = chado_insert_record('pubauthor_contact', $values, $options);
  1151. if (!$pubauthor_contact) {
  1152. tripal_report_error('tripal_pub', TRIPAL_ERROR, "Cannot link pub authro and contact.", array());
  1153. }
  1154. }
  1155. }
  1156. $rank++;
  1157. }
  1158. }
  1159. /**
  1160. * This function generates an array suitable for use with the
  1161. * tripal_pub_create_citation function for any publication
  1162. * already stored in the Chado tables.
  1163. *
  1164. * @param $pub_id
  1165. * The publication ID
  1166. * @param $skip_existing
  1167. * Set to TRUE to skip publications that already have a citation
  1168. * in the pubprop table. Set to FALSE to generate a citation
  1169. * regardless if the citation already exists.
  1170. *
  1171. * @return
  1172. * An array suitable for the trpial_pub_create_citation function. On
  1173. * failure returns FALSE.
  1174. *
  1175. * @ingroup tripal_pub
  1176. */
  1177. function tripal_pub_get_publication_array($pub_id, $skip_existing = TRUE) {
  1178. $options = array('return_array' => 1);
  1179. // ---------------------------------
  1180. // get the publication
  1181. // ---------------------------------
  1182. $values = array('pub_id' => $pub_id);
  1183. $pub = chado_generate_var('pub', $values);
  1184. // expand the title
  1185. $pub = chado_expand_var($pub, 'field', 'pub.title');
  1186. $pub = chado_expand_var($pub, 'field', 'pub.volumetitle');
  1187. $pub = chado_expand_var($pub, 'field', 'pub.uniquename');
  1188. $pub_array = array();
  1189. if (trim($pub->title)) {
  1190. $pub_array['Title'] = $pub->title;
  1191. }
  1192. if (trim($pub->volumetitle)) {
  1193. $pub_array['Volume Title'] = $pub->volumetitle;
  1194. }
  1195. if (trim($pub->volume)) {
  1196. $pub_array['Volume'] = $pub->volume;
  1197. }
  1198. if (trim($pub->series_name)) {
  1199. $pub_array['Series Name'] = $pub->series_name;
  1200. }
  1201. if (trim($pub->issue)) {
  1202. $pub_array['Issue'] = $pub->issue;
  1203. }
  1204. if (trim($pub->pyear)) {
  1205. $pub_array['Year'] = $pub->pyear;
  1206. }
  1207. if (trim($pub->pages)) {
  1208. $pub_array['Pages'] = $pub->pages;
  1209. }
  1210. if (trim($pub->miniref)) {
  1211. $pub_array['Mini Ref'] = $pub->miniref;
  1212. }
  1213. if (trim($pub->uniquename)) {
  1214. $pub_array['Uniquename'] = $pub->uniquename;
  1215. }
  1216. $pub_array['Publication Type'][] = $pub->type_id->name;
  1217. // ---------------------------------
  1218. // get the citation
  1219. // ---------------------------------
  1220. $values = array(
  1221. 'pub_id' => $pub->pub_id,
  1222. 'type_id' => array(
  1223. 'name' => 'Citation',
  1224. ),
  1225. );
  1226. $citation = chado_generate_var('pubprop', $values);
  1227. if ($citation) {
  1228. $citation = chado_expand_var($citation, 'field', 'pubprop.value', $options);
  1229. if (count($citation) > 1) {
  1230. tripal_report_error('tripal_pub', TRIPAL_ERROR, "Publication has multiple citations already: %pub_id",
  1231. array('%pub_id' => $pubid));
  1232. return FALSE;
  1233. }
  1234. elseif (count($citation) == 1 and $skip_existing == TRUE) {
  1235. // skip this publication, it already has a citation
  1236. return FALSE;
  1237. }
  1238. }
  1239. // ---------------------------------
  1240. // get the publication types
  1241. // ---------------------------------
  1242. $values = array(
  1243. 'pub_id' => $pub->pub_id,
  1244. 'type_id' => array(
  1245. 'name' => 'Publication Type',
  1246. ),
  1247. );
  1248. $ptypes = chado_generate_var('pubprop', $values, $options);
  1249. if ($ptypes) {
  1250. $ptypes = chado_expand_var($ptypes, 'field', 'pubprop.value', $options);
  1251. foreach ($ptypes as $ptype) {
  1252. $pub_array['Publication Type'][] = $ptype->value;
  1253. }
  1254. }
  1255. // ---------------------------------
  1256. // get the authors list
  1257. // ---------------------------------
  1258. $values = array(
  1259. 'pub_id' => $pub->pub_id,
  1260. 'type_id' => array(
  1261. 'name' => 'Authors',
  1262. ),
  1263. );
  1264. $authors = chado_generate_var('pubprop', $values);
  1265. $authors = chado_expand_var($authors, 'field', 'pubprop.value', $options);
  1266. if (count($authors) > 1) {
  1267. tripal_report_error('tripal_pub', TRIPAL_ERROR, "Publication has multiple author lists. It should have only one list: %pub_id",
  1268. array('%pub_id' => $pubid));
  1269. return FALSE;
  1270. }
  1271. else if (trim($authors->value)) {
  1272. $pub_array['Authors'] = $authors->value;
  1273. }
  1274. // if there is no 'Author's property then try to retreive authors from the pubauthor table
  1275. else {
  1276. $sql = "
  1277. SELECT string_agg(surname || ' ' || givennames, ', ')
  1278. FROM {pubauthor}
  1279. WHERE pub_id = :pub_id
  1280. GROUP BY pub_id
  1281. ";
  1282. $au = chado_query($sql, array(':pub_id' => $pub_id))->fetchField();
  1283. if ($au) {
  1284. $pub_array['Authors'] = $au;
  1285. }
  1286. }
  1287. //Get other props
  1288. $props = array(
  1289. 'Journal Abbreviation',
  1290. 'Elocation',
  1291. 'Media Code',
  1292. 'Conference Name',
  1293. 'Keywords',
  1294. 'Series Name',
  1295. 'pISSN',
  1296. 'Publication Date',
  1297. 'Journal Code',
  1298. 'Journal Alias',
  1299. 'Journal Country',
  1300. 'Published Location',
  1301. 'Publication Model',
  1302. 'Language Abbr',
  1303. 'Alias',
  1304. 'Publication Dbxref',
  1305. 'Copyright',
  1306. 'Abstract',
  1307. 'Notes',
  1308. 'Citation',
  1309. 'Language',
  1310. 'URL',
  1311. 'eISSN',
  1312. 'DOI',
  1313. 'ISSN',
  1314. 'Publication Code',
  1315. 'Comments',
  1316. 'Publisher',
  1317. 'Media Alias',
  1318. 'Original Title'
  1319. );
  1320. foreach ($props AS $prop) {
  1321. $sql =
  1322. "SELECT value FROM {pubprop}
  1323. WHERE type_id =
  1324. (SELECT cvterm_id
  1325. FROM {cvterm}
  1326. WHERE name = :cvtname AND cv_id =
  1327. (SELECT cv_id
  1328. FROM {cv}
  1329. WHERE name = 'tripal_pub'
  1330. )
  1331. )
  1332. AND pub_id = :pub_id
  1333. ";
  1334. $val = trim(chado_query($sql, array(':cvtname' => $prop, ':pub_id' => $pub->pub_id))->fetchField());
  1335. if ($val) {
  1336. $pub_array[$prop] =$val;
  1337. }
  1338. }
  1339. return $pub_array;
  1340. }
  1341. /**
  1342. * Imports all publications for all active import setups.
  1343. *
  1344. * @param $report_email
  1345. * A list of email address, separated by commas, that should be notified
  1346. * once importing has completed
  1347. * @param $do_update
  1348. * If set to TRUE then publications that already exist in the Chado database
  1349. * will be updated, whereas if FALSE only new publications will be added
  1350. *
  1351. * @ingroup tripal_pub
  1352. */
  1353. function tripal_execute_active_pub_importers($report_email = FALSE, $do_update = FALSE) {
  1354. $num_to_retrieve = 100;
  1355. $page = 0;
  1356. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  1357. "If the load fails or is terminated prematurely then the entire set of \n" .
  1358. "insertions/updates is rolled back and will not be found in the database\n\n";
  1359. // start the transaction
  1360. $transaction = db_transaction();
  1361. try {
  1362. // get all of the loaders
  1363. $args = array();
  1364. $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0 ";
  1365. $results = db_query($sql, $args);
  1366. $do_contact = FALSE;
  1367. $reports = array();
  1368. foreach ($results as $import) {
  1369. $page = 0;
  1370. print "Executing importer: '" . $import->name . "'\n";
  1371. // keep track if any of the importers want to create contacts from authors
  1372. if ($import->do_contact == 1) {
  1373. $do_contact = TRUE;
  1374. }
  1375. $criteria = unserialize($import->criteria);
  1376. $remote_db = $criteria['remote_db'];
  1377. do {
  1378. // retrieve the pubs for this page. We'll retreive 100 at a time
  1379. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  1380. $pubs = $results['pubs'];
  1381. $reports[$import->name] = tripal_pub_add_publications($pubs, $import->do_contact, $do_update);
  1382. $page++;
  1383. }
  1384. // continue looping until we have a $pubs array that does not have
  1385. // our requested numer of records. This means we've hit the end
  1386. while (count($pubs) == $num_to_retrieve);
  1387. }
  1388. // sync the newly added publications with Drupal. If the user
  1389. // requested a report then we don't want to print any syncing information
  1390. // so pass 'FALSE' to the sync call
  1391. print "Syncing publications with Drupal...\n";
  1392. chado_node_sync_records('pub');
  1393. // Iterate through each of the reports and generate a final report with
  1394. // HTML links.
  1395. $HTML_report = '';
  1396. if ($report_email) {
  1397. $HTML_report .= "<html>";
  1398. global $base_url;
  1399. foreach ($reports as $importer => $report) {
  1400. $total = count($report['inserted']);
  1401. $HTML_report .= "<b>$total new publications from importer: $importer</b><br><ol>\n";
  1402. foreach ($report['inserted'] as $pub) {
  1403. $item = $pub['Title'];
  1404. if (array_key_exists('pub_id', $pub)) {
  1405. $item = l($pub['Title'], "$base_url/pub/" . $pub['pub_id']);
  1406. }
  1407. $HTML_report .= "<li>$item</li>\n";
  1408. }
  1409. $HTML_report .= "</ol>\n";
  1410. }
  1411. $HTML_report .= "</html>";
  1412. $site_email = variable_get('site_mail', '');
  1413. $params = array(
  1414. 'message' => $HTML_report
  1415. );
  1416. drupal_mail('tripal_pub', 'import_report', $report_email, language_default(), $params, $site_email, TRUE);
  1417. }
  1418. // if any of the importers wanted to create contacts from the authors then sync them
  1419. if($do_contact) {
  1420. print "Syncing contacts with Drupal...\n";
  1421. chado_node_sync_records('contact');
  1422. }
  1423. }
  1424. catch (Exception $e) {
  1425. $transaction->rollback();
  1426. print "\n"; // make sure we start errors on new line
  1427. watchdog_exception('T_pub_import', $e);
  1428. print "FAILED: Rolling back database changes...\n";
  1429. return;
  1430. }
  1431. print "Done.\n";
  1432. }
  1433. /**
  1434. * Imports all publications for a given publication import setup.
  1435. *
  1436. * @param $import_id
  1437. * The ID of the import setup to use
  1438. * @param $job_id
  1439. * The jobs management job_id for the job if this function is run as a job.
  1440. *
  1441. * @ingroup tripal_pub
  1442. */
  1443. function tripal_execute_pub_importer($import_id, $job_id = NULL) {
  1444. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  1445. "If the load fails or is terminated prematurely then the entire set of \n" .
  1446. "insertions/updates is rolled back and will not be found in the database\n\n";
  1447. // start the transaction
  1448. $transaction = db_transaction();
  1449. try {
  1450. $page = 0;
  1451. $do_contact = FALSE;
  1452. $num_to_retrieve = 100;
  1453. // get all of the loaders
  1454. $args = array(':import_id' => $import_id);
  1455. $sql = "SELECT * FROM {tripal_pub_import} WHERE pub_import_id = :import_id ";
  1456. $import = db_query($sql, $args)->fetchObject();
  1457. print "Executing Importer: '" . $import->name . "'\n";
  1458. $criteria = unserialize($import->criteria);
  1459. $remote_db = $criteria['remote_db'];
  1460. $total_pubs = 0;
  1461. do {
  1462. // retrieve the pubs for this page. We'll retreive 100 at a time
  1463. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  1464. $pubs = $results['pubs'];
  1465. $num_pubs = $rseults['total_records'];
  1466. $total_pubs += $num_pubs;
  1467. tripal_pub_add_publications($pubs, $import->do_contact);
  1468. $page++;
  1469. }
  1470. // continue looping until we have a $pubs array that does not have
  1471. // our requested numer of records. This means we've hit the end
  1472. while (count($pubs) == $num_to_retrieve);
  1473. // sync the newly added publications with Drupal. If the user
  1474. // requested a report then we don't want to print any syncing information
  1475. // so pass 'FALSE' to the sync call
  1476. print "Syncing publications with Drupal...\n";
  1477. chado_node_sync_records('pub');
  1478. // if any of the importers wanted to create contacts from the authors then sync them
  1479. if($import->do_contact) {
  1480. print "Syncing contacts with Drupal...\n";
  1481. chado_node_sync_records('contact');
  1482. }
  1483. tripal_set_job_progress($job_id, '100');
  1484. }
  1485. catch (Exception $e) {
  1486. $transaction->rollback();
  1487. print "\n"; // make sure we start errors on new line
  1488. watchdog_exception('T_pub_import', $e);
  1489. print "FAILED: Rolling back database changes...\n";
  1490. return;
  1491. }
  1492. print "Done.\n";
  1493. }
  1494. /**
  1495. * This function is used to perfom a query using one of the supported databases
  1496. * and return the raw query results. This may be XML or some other format
  1497. * as provided by the database.
  1498. *
  1499. * @param $dbxref
  1500. * The unique database ID for the record to retrieve. This value must
  1501. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  1502. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  1503. * for the record in the database.
  1504. *
  1505. * @return
  1506. * Returns the publication array or FALSE if a problem occurs
  1507. *
  1508. * @ingroup tripal_pub
  1509. */
  1510. function tripal_get_remote_pub($dbxref) {
  1511. if(preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
  1512. $remote_db = $matches[1];
  1513. $accession = $matches[2];
  1514. // check that the database is supported
  1515. $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
  1516. if(!in_array($remote_db, $supported_dbs)) {
  1517. return FALSE;
  1518. }
  1519. $search = array(
  1520. 'num_criteria' => 1,
  1521. 'remote_db' => $remote_db,
  1522. 'criteria' => array(
  1523. '1' => array(
  1524. 'search_terms' => "$remote_db:$accession",
  1525. 'scope' => 'id',
  1526. 'operation' => '',
  1527. 'is_phrase' => 0,
  1528. ),
  1529. ),
  1530. );
  1531. $pubs = tripal_get_remote_pubs($remote_db, $search, 1, 0);
  1532. return $pubs['pubs'][0];
  1533. }
  1534. return FALSE;
  1535. }
  1536. /**
  1537. * Retrieves a list of publications as an associated array where
  1538. * keys correspond directly with Tripal Pub CV terms.
  1539. *
  1540. * @param remote_db
  1541. * The name of the remote publication database to query. These names should
  1542. * match the name of the databases in the Chado 'db' table. Currently
  1543. * supported databass include
  1544. * 'PMID': PubMed
  1545. *
  1546. * @param search_array
  1547. * An associate array containing the search criteria. The following key
  1548. * are expected
  1549. * 'remote_db': Specifies the name of the remote publication database
  1550. * 'num_criteria': Specifies the number of criteria present in the search array
  1551. * 'days': The number of days to include in the search starting from today
  1552. * 'criteria': An associate array containing the search critiera. There should
  1553. * be no less than 'num_criteria' elements in this array.
  1554. *
  1555. * The following keys are expected in the 'criteria' array
  1556. * 'search_terms': A list of terms to search on, separated by spaces.
  1557. * 'scope': The fields to search in the remote database. Valid values
  1558. * include: 'title', 'abstract', 'author' and 'any'
  1559. * 'operation': The logical operation to use for this criteria. Valid
  1560. * values include: 'AND', 'OR' and 'NOT'.
  1561. * @param $num_to_retrieve
  1562. * The number of records to retrieve. In cases with large numbers of
  1563. * records to retrieve, the remote database may limit the size of each
  1564. * retrieval.
  1565. * @param $page
  1566. * Optional. If this function is called where the
  1567. * page for the pager cannot be set using the $_GET variable, use this
  1568. * argument to specify the page to retrieve.
  1569. *
  1570. * @return
  1571. * Returns an array of pubs where each element is
  1572. * an associative array where the keys are Tripal Pub CV terms.
  1573. *
  1574. * @ingroup tripal_pub
  1575. */
  1576. function tripal_get_remote_pubs($remote_db, $search_array, $num_to_retrieve, $page = 0) {
  1577. // now call the callback function to get the results
  1578. $callback = "tripal_pub_remote_search_$remote_db";
  1579. $pubs = array(
  1580. 'total_records' => 0,
  1581. 'search_str' => '',
  1582. 'pubs' => array(),
  1583. );
  1584. if (function_exists($callback)) {
  1585. $pubs = call_user_func($callback, $search_array, $num_to_retrieve, $page);
  1586. }
  1587. return $pubs;
  1588. }
  1589. /**
  1590. * Imports a singe publication specified by a remote database cross reference.
  1591. *
  1592. * @param $pub_dbxref
  1593. * The unique database ID for the record to update. This value must
  1594. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  1595. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  1596. * for the record in the database.
  1597. * @param $do_contact
  1598. * Set to TRUE if authors should automatically have a contact record added
  1599. * to Chado.
  1600. * @param $do_update
  1601. * If set to TRUE then the publication will be updated if it already exists
  1602. * in the database.
  1603. *
  1604. * @ingroup tripal_pub
  1605. */
  1606. function tripal_import_pub_by_dbxref($pub_dbxref, $do_contact = FALSE, $do_update) {
  1607. $num_to_retrieve = 1;
  1608. $pager_id = 0;
  1609. $page = 0;
  1610. $num_pubs = 0;
  1611. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  1612. "If the load fails or is terminated prematurely then the entire set of \n" .
  1613. "insertions/updates is rolled back and will not be found in the database\n\n";
  1614. $transaction = db_transaction();
  1615. try {
  1616. if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
  1617. $dbname = $matches[1];
  1618. $accession = $matches[2];
  1619. $criteria = array(
  1620. 'num_criteria' => 1,
  1621. 'remote_db' => $dbname,
  1622. 'criteria' => array(
  1623. '1' => array(
  1624. 'search_terms' => "$dbname:$accession",
  1625. 'scope' => 'id',
  1626. 'operation' => '',
  1627. 'is_phrase' => 0,
  1628. ),
  1629. ),
  1630. );
  1631. $remote_db = $criteria['remote_db'];
  1632. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  1633. $pubs = $results['pubs'];
  1634. $search_str = $results['search_str'];
  1635. $total_records = $results['total_records'];
  1636. $pub_id = tripal_pub_add_publications($pubs, $do_contact, $do_update);
  1637. }
  1638. // sync the newly added publications with Drupal
  1639. print "Syncing publications with Drupal...\n";
  1640. chado_node_sync_records('pub');
  1641. // if any of the importers wanted to create contacts from the authors then sync them
  1642. if($do_contact) {
  1643. print "Syncing contacts with Drupal...\n";
  1644. chado_node_sync_records('contact');
  1645. }
  1646. }
  1647. catch (Exception $e) {
  1648. $transaction->rollback();
  1649. print "\n"; // make sure we start errors on new line
  1650. watchdog_exception('T_pub_import', $e);
  1651. print "FAILED: Rolling back database changes...\n";
  1652. return;
  1653. }
  1654. print "Done.\n";
  1655. }
  1656. /**
  1657. * Updates publication records that currently exist in the Chado pub table
  1658. * with the most recent data in the remote database.
  1659. *
  1660. * @param $do_contact
  1661. * Set to TRUE if authors should automatically have a contact record added
  1662. * to Chado. Contacts are added using the name provided by the remote
  1663. * database.
  1664. * @param $dbxref
  1665. * The unique database ID for the record to update. This value must
  1666. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  1667. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  1668. * for the record in the database.
  1669. * @param $db
  1670. * The name of the remote database to update. If this value is provided and
  1671. * no dbxref then all of the publications currently in the Chado database
  1672. * for this remote database will be updated.
  1673. *
  1674. * @ingroup tripal_pub
  1675. */
  1676. function tripal_reimport_publications($do_contact = FALSE, $dbxref = NULL, $db = NULL) {
  1677. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  1678. "If the load fails or is terminated prematurely then the entire set of \n" .
  1679. "insertions/updates is rolled back and will not be found in the database\n\n";
  1680. $transaction = db_transaction();
  1681. try {
  1682. // get a list of all publications by their Dbxrefs that have supported databases
  1683. $sql = "
  1684. SELECT DB.name as db_name, DBX.accession
  1685. FROM pub P
  1686. INNER JOIN pub_dbxref PDBX ON P.pub_id = PDBX.pub_id
  1687. INNER JOIN dbxref DBX ON DBX.dbxref_id = PDBX.dbxref_id
  1688. INNER JOIN db DB ON DB.db_id = DBX.db_id
  1689. ";
  1690. $args = array();
  1691. if ($dbxref and preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
  1692. $dbname = $matches[1];
  1693. $accession = $matches[2];
  1694. $sql .= "WHERE DBX.accession = :accession and DB.name = :dbname ";
  1695. $args[':accession'] = $accession;
  1696. $args[':dbname'] = $dbname;
  1697. }
  1698. elseif ($db) {
  1699. $sql .= " WHERE DB.name = :dbname ";
  1700. $args[':dbname'] = $db;
  1701. }
  1702. $sql .= "ORDER BY DB.name, P.pub_id";
  1703. $results = chado_query($sql, $args);
  1704. $num_to_retrieve = 100;
  1705. $i = 0; // count the number of IDs. When we hit $num_to_retrieve we'll do the query
  1706. $curr_db = ''; // keeps track of the current current database
  1707. $ids = array(); // the list of IDs for the database
  1708. $search = array(); // the search array passed to the search function
  1709. // iterate through the pub IDs
  1710. while ($pub = $results->fetchObject()) {
  1711. $accession = $pub->accession;
  1712. $remote_db = $pub->db_name;
  1713. // here we need to only update publications for databases we support
  1714. $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
  1715. if(!in_array($remote_db, $supported_dbs)) {
  1716. continue;
  1717. }
  1718. $search = array(
  1719. 'num_criteria' => 1,
  1720. 'remote_db' => $remote_db,
  1721. 'criteria' => array(
  1722. '1' => array(
  1723. 'search_terms' => "$remote_db:$accession",
  1724. 'scope' => 'id',
  1725. 'operation' => '',
  1726. 'is_phrase' => 0,
  1727. ),
  1728. ),
  1729. );
  1730. $pubs = tripal_get_remote_pubs($remote_db, $search, 1, 0);
  1731. tripal_pub_add_publications($pubs, $do_contact, TRUE);
  1732. $i++;
  1733. }
  1734. // sync the newly added publications with Drupal
  1735. print "Syncing publications with Drupal...\n";
  1736. chado_node_sync_records('pub');
  1737. // if the caller wants to create contacts then we should sync them
  1738. if ($do_contact) {
  1739. print "Syncing contacts with Drupal...\n";
  1740. chado_node_sync_records('contact');
  1741. }
  1742. }
  1743. catch (Exception $e) {
  1744. $transaction->rollback();
  1745. print "\n"; // make sure we start errors on new line
  1746. watchdog_exception('T_pub_import', $e);
  1747. print "FAILED: Rolling back database changes...\n";
  1748. return;
  1749. }
  1750. print "Done.\n";
  1751. }