tripal_pub.AGL.inc 30 KB


  1. <?php
  2. /**
  3. * @file
  4. * @stephen
  5. */
  6. /**
  7. * @stephen
  8. *
  9. * @param $form
  10. * @param $form_state
  11. * @param $num_criteria
  12. *
  13. * @return
  14. * The form (drupal form api)
  15. *
  16. * @ingroup tripal_pub
  17. */
  18. function tripal_pub_remote_alter_form_AGL($form, $form_state, $num_criteria = 1) {
  19. // So far we haven't been able to get AGL to filter results to only
  20. // include pubs by the XX number days in the past. So, we will
  21. // change the 'days' element to be the year to query
  22. $form['themed_element']['days']['#title'] = t('Year');
  23. $form['themed_element']['days']['#description'] = t('Please enter a year to limit records by the year they were published, created or modified in the database.');
  24. // The Journal Name filter doesn't seem to work, so remove it
  25. for($i = 1; $i <= $num_criteria; $i++) {
  26. unset($form['themed_element']['criteria'][$i]["scope-$i"]['#options']['journal']);
  27. }
  28. return $form;
  29. }
  30. /**
  31. * @stephen
  32. *
  33. * @param $form
  34. * @param $form_state
  35. *
  36. * @return
  37. * The form (drupal form api)
  38. *
  39. * @ingroup tripal_pub
  40. */
  41. function tripal_pub_remote_validate_form_AGL($form, $form_state) {
  42. $days = trim($form_state['values']["days"]);
  43. $num_criteria = $form_state['values']['num_criteria'];
  44. if ($days and !preg_match('/^\d\d\d\d$/', $days)) {
  45. form_set_error("days", "Please enter a four digit year.");
  46. }
  47. $num_ids = 0;
  48. for ($i = 1; $i <= $num_criteria; $i++) {
  49. $search_terms = trim($form_state['values']["search_terms-$i"]);
  50. $scope = $form_state['values']["scope-$i"];
  51. if ($scope == 'id' and !preg_match('/^AGL:\d+$/', $search_terms)) {
  52. form_set_error("search_terms-$i", "The AGL accession be a numeric value, prefixed with 'AGL:' (e.g. AGL:3890740).");
  53. }
  54. if ($scope == 'id') {
  55. $num_ids++;
  56. }
  57. if($num_ids > 1) {
  58. form_set_error("search_terms-$i", "Unfortuantely, the AGL importer can only support a single accession at a time. Please remove the others.");
  59. }
  60. }
  61. return $form;
  62. }
  63. /**
  64. * @stephen
  65. *
  66. * @param $search_array
  67. * @param $num_to_retrieve
  68. * @param $page
  69. *
  70. * @return
  71. *
  72. *
  73. * @ingroup tripal_pub
  74. */
  75. function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $page) {
  76. // get some values from the serach array
  77. $num_criteria = $search_array['num_criteria'];
  78. $days = $search_array['days'];
  79. // set some defaults
  80. $search_array['limit'] = $num_to_retrieve;
  81. // To build the CCL search string we want to have a single entry for 'author', 'title', 'abstract'
  82. // or 'id', and also the corresponding 'not for each of those.
  83. // But the search form allows the user to have multiple rows of the same type. So, we will build the
  84. // search string separately for each category and it's negative category (if NOT is selected as the op)
  85. // and at the end we will put them together into a single search string. We need to keep
  86. // track of the first entry of any category because it will not have an op (e.g. 'or' or 'and') but the
  87. // operation will be pushed out to separate the categories. The op for any second or third instance of
  88. // the same category will be included within the search string for the catgory.
  89. $ccl = '';
  90. $title = '';
  91. $author = '';
  92. $abstract = '';
  93. $id = '';
  94. $any = '';
  95. $negate_title = '';
  96. $negate_author = '';
  97. $negate_abstract = '';
  98. $negate_id = '';
  99. $negate_any = '';
  100. $order = array();
  101. $first_abstract = 1;
  102. $first_author = 1;
  103. $first_title = 1;
  104. $first_id = 1;
  105. $first_any = 1;
  106. $first_negate_abstract = 1;
  107. $first_negate_author = 1;
  108. $first_negate_title = 1;
  109. $first_negate_id = 1;
  110. $first_negate_any = 1;
  111. for ($i = 1; $i <= $num_criteria; $i++) {
  112. $search_terms = trim($search_array['criteria'][$i]['search_terms']);
  113. $scope = $search_array['criteria'][$i]['scope'];
  114. $is_phrase = $search_array['criteria'][$i]['is_phrase'];
  115. $op = $search_array['criteria'][$i]['operation'];
  116. if ($op) {
  117. $op = strtolower($op);
  118. }
  119. $search_terms = trim($search_terms);
  120. // if this is not a phrase then make sure the AND and OR are lower-case
  121. if (!$is_phrase) {
  122. $search_terms = preg_replace('/ OR /', ' or ', $search_terms);
  123. $search_terms = preg_replace('/ AND /', ' and ', $search_terms);
  124. }
  125. // else make sure the search terms are surrounded by quotes
  126. else {
  127. $search_terms = "\"$search_terms\"";
  128. }
  129. // if this is a 'not' operation then we want to change it to an
  130. // and
  131. $negate = '';
  132. if ($op == 'not') {
  133. $scope = "negate_$scope";
  134. $op = 'or';
  135. }
  136. $order[] = array('scope' => $scope, 'op' => $op);
  137. // build each category
  138. if ($scope == 'title') {
  139. if ($first_title) {
  140. $title .= "($search_terms) ";
  141. $first_title = 0;
  142. }
  143. else {
  144. $title .= "$op ($search_terms) ";
  145. }
  146. }
  147. if ($scope == 'negate_title') {
  148. if ($first_negate_title) {
  149. $negate_title .= "($search_terms) ";
  150. $first_negate_title = 0;
  151. }
  152. else {
  153. $negate_title .= "$op ($search_terms) ";
  154. }
  155. }
  156. elseif ($scope == 'author') {
  157. if ($first_author) {
  158. $author .= "($search_terms) ";
  159. $first_author = 0;
  160. }
  161. else {
  162. $author .= "$op ($search_terms) ";
  163. }
  164. }
  165. elseif ($scope == 'negate_author') {
  166. if ($first_negate_author) {
  167. $negate_author .= "($search_terms) ";
  168. $first_negate_author = 0;
  169. }
  170. else {
  171. $negate_author .= "$op ($search_terms) ";
  172. }
  173. }
  174. elseif ($scope == 'abstract') {
  175. if ($first_abstract) {
  176. $abstract .= "($search_terms) ";
  177. $first_abstract = 0;
  178. }
  179. else {
  180. $abstract .= "$op ($search_terms) ";
  181. }
  182. }
  183. elseif ($scope == 'negate_abstract') {
  184. if ($first_negate_abstract) {
  185. $negate_abstract .= "($search_terms) ";
  186. $first_negate_abstract = 0;
  187. }
  188. else {
  189. $negate_abstract .= "$op ($search_terms) ";
  190. }
  191. }
  192. elseif ($scope == 'journal') {
  193. if ($first_journal) {
  194. $journal .= "($search_terms) ";
  195. $first_jounral = 0;
  196. }
  197. else {
  198. $journal .= "$op ($search_terms) ";
  199. }
  200. }
  201. elseif ($scope == 'negate_journal') {
  202. if ($first_negate_journal) {
  203. $negate_journal .= "($search_terms) ";
  204. $first_negate_journal = 0;
  205. }
  206. else {
  207. $negate_journal .= "$op ($search_terms) ";
  208. }
  209. }
  210. elseif ($scope == 'id') {
  211. if ($first_id) {
  212. $id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  213. $first_id = 0;
  214. }
  215. else {
  216. $id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  217. }
  218. }
  219. elseif ($scope == 'negate_id') {
  220. if ($first_negate_id) {
  221. $negate_id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  222. $first_negate_id = 0;
  223. }
  224. else {
  225. $negate_id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  226. }
  227. }
  228. elseif ($scope == 'any'){
  229. if ($first_any) {
  230. $any .= "($search_terms) ";
  231. $first_any = 0;
  232. }
  233. else {
  234. $any .= "$op ($search_terms) ";
  235. }
  236. }
  237. elseif ($scope == 'negate_any'){
  238. if ($first_negate_any) {
  239. $negate_any .= "($search_terms) ";
  240. $first_any = 0;
  241. }
  242. else {
  243. $negate_any .= "$op ($search_terms) ";
  244. }
  245. }
  246. }
  247. // now build the CCL string in order
  248. $abstract_done = 0;
  249. $author_done = 0;
  250. $journal_done = 0;
  251. $title_done = 0;
  252. $id_done = 0;
  253. $any_done = 0;
  254. $negate_abstract_done = 0;
  255. $negate_journal_done = 0;
  256. $negate_author_done = 0;
  257. $negate_title_done = 0;
  258. $negate_id_done = 0;
  259. $negate_any_done = 0;
  260. for ($i = 0; $i < count($order) ; $i++) {
  261. if ($order[$i]['scope'] == 'abstract' and !$abstract_done) {
  262. $op = $order[$i]['op'];
  263. $ccl .= "$op abstract=($abstract) ";
  264. $abstract_done = 1;
  265. }
  266. if ($order[$i]['scope'] == 'negate_abstract' and !$negate_abstract_done) {
  267. $ccl .= "not abstract=($negate_abstract) ";
  268. $negate_abstract_done = 1;
  269. }
  270. if ($order[$i]['scope'] == 'author' and !$author_done) {
  271. $op = $order[$i]['op'];
  272. $ccl .= "$op author=($author) ";
  273. $author_done = 1;
  274. }
  275. if ($order[$i]['scope'] == 'negate_author' and !$negate_author_done) {
  276. $ccl .= "not author=($negate_author) ";
  277. $negate_author_done = 1;
  278. }
  279. if ($order[$i]['scope'] == 'journal' and !$journal_done) {
  280. $op = $order[$i]['op'];
  281. $ccl .= "$op journal=($journal) ";
  282. $journal_done = 1;
  283. }
  284. if ($order[$i]['scope'] == 'negate_journal' and !$negate_journal_done) {
  285. $ccl .= "not author=($negate_journal) ";
  286. $negate_journal_done = 1;
  287. }
  288. if ($order[$i]['scope'] == 'id' and !$id_done) {
  289. $op = $order[$i]['op'];
  290. $ccl .= "$op id=($id) ";
  291. $id_done = 1;
  292. }
  293. if ($order[$i]['scope'] == 'negate_id' and !$negate_id_done) {
  294. $ccl .= "not id=($negate_id) ";
  295. $negate_id_done = 1;
  296. }
  297. if ($order[$i]['scope'] == 'title' and !$title_done) {
  298. $op = $order[$i]['op'];
  299. $ccl .= "$op title=($title) ";
  300. $title_done = 1;
  301. }
  302. if ($order[$i]['scope'] == 'negate_title' and !$negate_title_done) {
  303. $ccl .= "not title=($negate_title) ";
  304. $negate_title_done = 1;
  305. }
  306. if ($order[$i]['scope'] == 'any' and !$any_done) {
  307. $op = $order[$i]['op'];
  308. $ccl .= "$op ($any) ";
  309. $any_done = 1;
  310. }
  311. if ($order[$i]['scope'] == 'negate_any' and !$negate_any_done) {
  312. $ccl .= "not ($negate_any) ";
  313. $negate_any_done = 1;
  314. }
  315. }
  316. // for AGL the 'days' form element was converted to represent the year
  317. if ($days) {
  318. $ccl .= "and year=($days)";
  319. }
  320. // remove any preceeding 'and' or 'or'
  321. $ccl = preg_replace('/^\s*(and|or)/', '', $ccl);
  322. // yaz_connect() prepares for a connection to a Z39.50 server. This function is non-blocking
  323. // and does not attempt to establish a connection - it merely prepares a connect to be
  324. // performed later when yaz_wait() is called.
  325. //$yazc = yaz_connect('agricola.nal.usda.gov:7090/voyager'); // NAL Catalog
  326. $yazc = yaz_connect('agricola.nal.usda.gov:7190/voyager'); // NAL Article Citation Database
  327. // use the USMARC record type. But OPAC is also supported by Agricola
  328. yaz_syntax($yazc, "usmarc");
  329. // the search query is built using CCL, we need to first
  330. // configure it so it can map the attributes to defined identifiers
  331. // The attribute set used by AGL can be found at the bottom of this page:
  332. // http://agricola.nal.usda.gov/help/z3950.html
  333. //
  334. // More in depth details: http://www.loc.gov/z3950/agency/bib1.html
  335. //
  336. // CCL Syntax: http://www.indexdata.com/yaz/doc/tools.html#CCL
  337. //
  338. $fields = array(
  339. "title" => "u=4",
  340. "author" => "u=1003",
  341. "abstract" => "u=62",
  342. "id" => "u=12",
  343. "year" => "u=30 r=o",
  344. "journal" => "u=1033"
  345. );
  346. yaz_ccl_conf($yazc, $fields);
  347. if (!yaz_ccl_parse($yazc, $ccl, $cclresult)) {
  348. drupal_set_message('Error parsing search string: ' . $cclresult["errorstring"], "error");
  349. watchdog('tpub_import', 'Error: %errstr', array('%errstr' => $cclresult["errorstring"]), WATCHDOG_ERROR);
  350. return array(
  351. 'total_records' => 0,
  352. 'search_str' => '',
  353. 'pubs' => array(),
  354. );
  355. }
  356. $search_str = $cclresult["rpn"];
  357. // get the total number of records
  358. $total_records = tripal_pub_AGL_count($yazc, $search_str);
  359. // get the pubs in the specified rang
  360. $start = $page * $num_to_retrieve;
  361. $results = tripal_pub_AGL_range($yazc, $search_str, $start, $num_to_retrieve, $total_records);
  362. // close the connection
  363. yaz_close($yazc);
  364. return $results;
  365. }
  366. /**
  367. * @stephen
  368. *
  369. * @param $yazc
  370. * @param $search_str
  371. * @param $start
  372. * @param $num_to_retrieve
  373. * @param $total_records
  374. *
  375. * @return
  376. *
  377. *
  378. * @ingroup tripal_pub
  379. */
  380. function tripal_pub_AGL_range($yazc, $search_str, $start, $num_to_retrieve, $total_records) {
  381. yaz_range($yazc, 1, $total_records);
  382. if (!yaz_present($yazc)) {
  383. $error_no = yaz_errno($yazc);
  384. $error_msg = yaz_error($yazc);
  385. $additional = yaz_addinfo($yazc);
  386. if ($additional != $error_msg) {
  387. $error_msg .= " $additional";
  388. }
  389. drupal_set_message("ERROR waiting on search at AGL: ($error_no) $error_msg", "error");
  390. watchdog('tpub_import', "ERROR waiting on search at AGL: (%error_no) %error_msg",
  391. array('%error_no' => $error_no, '%error_msg' => $error_msg), WATCHDOG_ERROR);
  392. return array(
  393. 'total_records' => $total_records,
  394. 'search_str' => $search_str,
  395. 'pubs' => array(),
  396. );
  397. }
  398. if ($start + $num_to_retrieve > $total_records) {
  399. $num_to_retrieve = $total_records - $start;
  400. }
  401. $pubs = array();
  402. for($i = $start; $i < $start + $num_to_retrieve; $i++) {
  403. $pub_xml = yaz_record($yazc, $i + 1, 'xml; charset=marc-8,utf-8');
  404. $pub = tripal_pub_AGL_parse_pubxml($pub_xml);
  405. $pubs[] = $pub;
  406. }
  407. return array(
  408. 'total_records' => $total_records,
  409. 'search_str' => $search_str,
  410. 'pubs' => $pubs,
  411. );
  412. }
  413. /**
  414. * This function is used as the callback function when used with the
  415. * tripal_pager_callback function.
  416. *
  417. * @stephen
  418. *
  419. * @param $yazc
  420. * @param $search_str
  421. *
  422. * @return
  423. * a count of the dataset to be paged.
  424. *
  425. * @ingroup tripal_pub
  426. */
  427. function tripal_pub_AGL_count($yazc, $search_str) {
  428. //yaz_sort($yazc, "1=31 id"); // sort by publication date descending
  429. if (!yaz_search($yazc, "rpn", $search_str)){
  430. $error_no = yaz_errno($yazc);
  431. $error_msg = yaz_error($yazc);
  432. $additional = yaz_addinfo($yazc);
  433. if ($additional != $error_msg) {
  434. $error_msg .= " $additional";
  435. }
  436. drupal_set_message("ERROR preparing search at AGL: ($error_no) $error_msg", "error");
  437. watchdog('tpub_import', "ERROR preparing search at AGL: (%error_no) %error_msg",
  438. array('%error_no' => $error_no, '%error_msg' => $error_msg), WATCHDOG_ERROR);
  439. return 0;
  440. }
  441. if (!yaz_wait()) {
  442. $error_no = yaz_errno($yazc);
  443. $error_msg = yaz_error($yazc);
  444. $additional = yaz_addinfo($yazc);
  445. if ($additional != $error_msg) {
  446. $error_msg .= " $additional";
  447. }
  448. drupal_set_message("ERROR waiting on search at AGL: ($error_no) $error_msg", "error");
  449. watchdog('tpub_import', "ERROR waiting on search at AGL: (%error_no) %error_msg",
  450. array('%error_no' => $error_no, '%error_msg' => $error_msg), WATCHDOG_ERROR);
  451. return 0;
  452. }
  453. // get the total number of results from the serach
  454. $count = yaz_hits($yazc);
  455. return $count;
  456. }
  457. /**
  458. * Parse publication XML for a single publication
  459. *
  460. * Description of XML format:
  461. * http://www.loc.gov/marc/bibliographic/bdsummary.html
  462. *
  463. * @param $pub_xml
  464. * A string containing the XML for a single publications
  465. *
  466. * @return
  467. * An array containing the details of the publication
  468. *
  469. * @ingroup tripal_pub
  470. */
  471. function tripal_pub_AGL_parse_pubxml($pub_xml) {
  472. $pub = array();
  473. // we will set the default publication type as a journal article. The NAL
  474. // dataset doesn't specify an article type so we'll have to glean the type
  475. // from other information (e.g. series name has 'Proceedings' in it)
  476. $pub['Publication Type'][0] = 'Journal Article';
  477. if (!$pub_xml) {
  478. return $pub;
  479. }
  480. // read the XML and iterate through it.
  481. $xml = new XMLReader();
  482. $xml->xml(trim($pub_xml));
  483. while ($xml->read()) {
  484. $element = $xml->name;
  485. if ($xml->nodeType == XMLReader::ELEMENT and $element == 'controlfield') {
  486. $tag = $xml->getAttribute('tag');
  487. $xml->read();
  488. $value = $xml->value;
  489. switch ($tag) {
  490. case '001': // control number
  491. $pub['Publication Accession'] = $value;
  492. break;
  493. case '003': // control number identifier
  494. break;
  495. case '005': // datea nd time of latest transaction
  496. break;
  497. case '006': // fixed-length data elemetns
  498. break;
  499. case '007': // physical description fixed field
  500. break;
  501. case '008': // fixed length data elements
  502. $month = array(
  503. '01' => 'Jan', '02' => 'Feb', '03' => 'Mar',
  504. '04' => 'Apr', '05' => 'May', '06' => 'Jun',
  505. '07' => 'Jul', '08' => 'Aug', '09' => 'Sep',
  506. '10' => 'Oct', '11' => 'Nov', '12' => 'Dec'
  507. );
  508. $date0 = substr($value, 0, 6); // date entered on file
  509. $date1 = substr($value, 7, 4); // year of publication
  510. $date2 = substr($value, 11, 4); // month of publication
  511. $place = substr($value, 15, 3);
  512. $lang = substr($value, 35, 3);
  513. if (preg_match('/\d\d\d\d/', $date1)) {
  514. $pub['Year'] = $date1;
  515. $pub['Publication Date'] = $date1;
  516. }
  517. if (preg_match('/\d\d/', $date2)) {
  518. $pub['Publication Date'] = $date1 . " " . $month[substr($date2, 0, 2)] . " " . substr($date2, 3, 2);
  519. }
  520. if (!preg_match('/\s+/', $place)) {
  521. $pub['Published Location'] = $place;
  522. }
  523. if (!preg_match('/\s+/', $lang)) {
  524. $pub['Language Abbr'] = $lang;
  525. }
  526. break;
  527. default: // unhandled tag
  528. break;
  529. }
  530. }
  531. elseif ($xml->nodeType == XMLReader::ELEMENT and $element == 'datafield') {
  532. $tag = $xml->getAttribute('tag');
  533. $ind1 = $xml->getAttribute('ind1');
  534. $ind2 = $xml->getAttribute('ind2');
  535. switch ($tag) {
  536. case '16': // National Bibliographic Agency Control Number
  537. break;
  538. case '35': // System Control Number
  539. $author = array();
  540. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  541. foreach ($codes as $code => $value) {
  542. switch ($code) {
  543. case 'a': // System control number
  544. $pub['Publication Accession'] = $value;
  545. break;
  546. }
  547. }
  548. case '40': // Cataloging Source (NR)
  549. $author = array();
  550. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  551. foreach ($codes as $code => $value) {
  552. switch ($code) {
  553. case 'a': // original cataolging agency
  554. $pub['Publication Database'] = $value;
  555. break;
  556. }
  557. }
  558. break;
  559. case '72': // Subject Category Code
  560. break;
  561. case '100': // main entry-personal name
  562. $author = tripal_pub_remote_search_AGL_get_author($xml, $ind1);
  563. $pub['Author List'][] = $author;
  564. break;
  565. case '110': // main entry-corporate nmae
  566. $author = array();
  567. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  568. foreach ($codes as $code => $value) {
  569. switch ($code) {
  570. case 'a': // Corporate name or jurisdiction name as entry elemen
  571. $author['Collective'] = $value;
  572. break;
  573. case 'b': // Subordinate unit
  574. $author['Collective'] .= ' ' . $value;
  575. break;
  576. }
  577. }
  578. $pub['Author List'][] = $author;
  579. break;
  580. case '111': // main entry-meeting name
  581. break;
  582. case '130': // main entry-uniform title
  583. break;
  584. case '210': // abbreviated title
  585. break;
  586. case '222': // key title
  587. break;
  588. case '240': // uniform title
  589. break;
  590. case '242': // translation of title by cataloging agency
  591. break;
  592. case '243': // collective uniform title
  593. break;
  594. case '245': // title statement
  595. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  596. foreach ($codes as $code => $value) {
  597. switch ($code) {
  598. case 'a':
  599. $pub['Title'] = trim(preg_replace('/\.$/', '', $value));
  600. break;
  601. case 'b':
  602. $pub['Title'] .= ' ' . $value;
  603. break;
  604. case 'h':
  605. $pub['Publication Model'] = $value;
  606. break;
  607. }
  608. }
  609. break;
  610. case '246': // varying form of title
  611. break;
  612. case '247': // former title
  613. break;
  614. case '250': // edition statement
  615. break;
  616. case '254': // musicla presentation statement
  617. break;
  618. case '255': // cartographic mathematical data
  619. break;
  620. case '256': // computer file characteristics
  621. break;
  622. case '257': // country of producing entity
  623. break;
  624. case '258': // philatelic issue data
  625. break;
  626. case '260': // publication, distribution ,etc (imprint)
  627. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  628. foreach ($codes as $code => $value) {
  629. switch ($code) {
  630. case 'a':
  631. $pub['Published Location'] = $value;
  632. break;
  633. case 'b':
  634. $pub['Publisher'] = $value;
  635. break;
  636. case 'c':
  637. $pub['Publication Date'] = $value;
  638. break;
  639. }
  640. }
  641. break;
  642. case '263': // projected publication date
  643. break;
  644. case '264': // production, publication, distribution, manufacture and copyright notice
  645. break;
  646. case '270': // Address
  647. break;
  648. case '300': // Address
  649. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  650. foreach ($codes as $code => $value) {
  651. switch ($code) {
  652. case 'a':
  653. $pages = $value;
  654. $pages = preg_replace('/^p\. /', '', $pages);
  655. $pages = preg_replace('/\.$/', '' , $pages);
  656. if(preg_match('/p$/', $pages)) {
  657. // skip this, it's the number of pages not the page numbers
  658. }
  659. else {
  660. $pub['Pages'] = $pages;
  661. }
  662. break;
  663. }
  664. }
  665. break;
  666. case '500': // series statements
  667. $pub['Notes'] = $value;
  668. break;
  669. case '504': // Bibliography, Etc. Note
  670. break;
  671. case '520': // Summary, etc
  672. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  673. foreach ($codes as $code => $value) {
  674. switch ($code) {
  675. case 'a':
  676. $pub['Abstract'] = $value;
  677. break;
  678. }
  679. }
  680. break;
  681. case '650': // Subject Added Entry-Topical Term
  682. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  683. foreach ($codes as $code => $value) {
  684. switch ($code) {
  685. case 'a':
  686. $pub['Keywords'][] = $value;
  687. break;
  688. }
  689. }
  690. break;
  691. case '653': // Index Term-Uncontrolled
  692. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  693. foreach ($codes as $code => $value) {
  694. switch ($code) {
  695. case 'a':
  696. $pub['Keywords'][] = $value;
  697. break;
  698. }
  699. }
  700. break;
  701. case '700': // Added Entry-Personal Name
  702. $author = tripal_pub_remote_search_AGL_get_author($xml, $ind1);
  703. $pub['Author List'][] = $author;
  704. break;
  705. case '710': // Added Entry-Corporate Name
  706. $author = array();
  707. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  708. foreach ($codes as $code => $value) {
  709. switch ($code) {
  710. case 'a': // Corporate name or jurisdiction name as entry elemen
  711. $author['Collective'] = $value;
  712. break;
  713. case 'b': // Subordinate unit
  714. $author['Collective'] .= ' ' . $value;
  715. break;
  716. }
  717. }
  718. $pub['Author List'][] = $author;
  719. break;
  720. case '773': // host item entry
  721. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  722. foreach ($codes as $code => $value) {
  723. switch ($code) {
  724. case 'a':
  725. if (preg_match('/Proceedings/i', $value)) {
  726. $pub['Series Name'] = preg_replace('/\.$/', '', $value);
  727. $pub['Publication Type'][0] = 'Conference Proceedings';
  728. }
  729. else {
  730. $pub['Journal Name'] = preg_replace('/\.$/', '', $value);
  731. }
  732. break;
  733. case 't':
  734. if (preg_match('/Proceedings/i', $value)) {
  735. $pub['Series Name'] = preg_replace('/\.$/', '', $value);
  736. $pub['Publication Type'][0] = 'Conference Proceedings';
  737. }
  738. $pub['Journal Name'] = preg_replace('/\.$/', '', $value);
  739. break;
  740. case 'g':
  741. $matches = array();
  742. if (preg_match('/^(\d\d\d\d)/', $value, $matches)) {
  743. $pub['Publication Date'] = $matches[1];
  744. }
  745. elseif (preg_match('/(.*?)(\.|\s+)\s*(\d+),\s(\d\d\d\d)/', $value, $matches)) {
  746. $year = $matches[4];
  747. $month = $matches[1];
  748. $day = $matches[3];
  749. $pub['Publication Date'] = "$year $month $day";
  750. }
  751. elseif (preg_match('/\((.*?)(\.|\s+)(\d\d\d\d)\)/', $value, $matches)) {
  752. $year = $matches[3];
  753. $month = $matches[1];
  754. $pub['Publication Date'] = "$year $month";
  755. }
  756. elseif (preg_match('/^(.*?) (\d\d\d\d)/', $value, $matches)) {
  757. $year = $matches[2];
  758. $month = $matches[1];
  759. $pub['Publication Date'] = "$year $month";
  760. }
  761. if (preg_match('/v\. (.*?)(,|\s+)/', $value, $matches)) {
  762. $pub['Volume'] = $matches[1];
  763. }
  764. if (preg_match('/v\. (.*?)(,|\s+)\((.*?)\)/', $value, $matches)) {
  765. $pub['Volume'] = $matches[1];
  766. $pub['Issue'] = $matches[3];
  767. }
  768. if (preg_match('/no\. (.*?)(\s|$)/', $value, $matches)) {
  769. $pub['Issue'] = $matches[1];
  770. }
  771. break;
  772. case 'p':
  773. $pub['Journal Abbreviation'] = $value;
  774. break;
  775. case 'z':
  776. $pub['ISBN'] = $value;
  777. break;
  778. }
  779. }
  780. break;
  781. case '852': // Location (Where is the publication held)
  782. break;
  783. case '856': // Electronic Location and Access
  784. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  785. foreach ($codes as $code => $value) {
  786. switch ($code) {
  787. case 'u':
  788. $pub['URL'] = $value;
  789. break;
  790. }
  791. }
  792. break;
  793. default:
  794. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  795. $unhandled[$tag][] = $codes;
  796. break;
  797. }
  798. }
  799. }
  800. //dpm($unhandled);
  801. // build the Dbxref
  802. if ($pub['Publication Database'] != 'AGL') {
  803. }
  804. if ($pub['Publication Accession'] and $pub['Publication Database']) {
  805. $pub['Publication Dbxref'] = $pub['Publication Database'] . ":" . $pub['Publication Accession'];
  806. unset($pub['Publication Accession']);
  807. unset($pub['Publication Database']);
  808. }
  809. // build the full authors list
  810. if (is_array($pub['Author List'])) {
  811. $authors = '';
  812. foreach ($pub['Author List'] as $author) {
  813. if (array_key_exists('valid', $author) and $author['valid'] == 'N') {
  814. // skip non-valid entries. A non-valid entry should have
  815. // a corresponding corrected entry so we can saftely skip it.
  816. continue;
  817. }
  818. if (array_key_exists('Collective', $author)) {
  819. $authors .= $author['Collective'] . ', ';
  820. }
  821. else {
  822. if (array_key_exists('Surname', $author)) {
  823. $authors .= $author['Surname'];
  824. if(array_key_exists('First Initials', $author)) {
  825. $authors .= ' ' . $author['First Initials'];
  826. }
  827. $authors .= ', ';
  828. }
  829. }
  830. }
  831. $authors = substr($authors, 0, -2);
  832. $pub['Authors'] = $authors;
  833. }
  834. else {
  835. $pub['Authors'] = $pub['Author List'];
  836. }
  837. // build the citation
  838. $pub['Citation'] = tripal_pub_create_citation($pub);
  839. $pub['raw'] = $pub_xml;
  840. return $pub;
  841. }
  842. /**
  843. * @stephen
  844. *
  845. * @param $xml
  846. *
  847. * @return
  848. *
  849. *
  850. * @ingroup tripal_pub
  851. */
  852. function tripal_pub_remote_search_AGL_get_subfield($xml) {
  853. $codes = array();
  854. while ($xml->read()) {
  855. $sub_element = $xml->name;
  856. // when we've reached the end of the datafield element then break out of the while loop
  857. if ($xml->nodeType == XMLReader::END_ELEMENT and $sub_element == 'datafield') {
  858. return $codes;
  859. }
  860. // if inside the subfield element then get the code
  861. if ($xml->nodeType == XMLReader::ELEMENT and $sub_element == 'subfield') {
  862. $code = $xml->getAttribute('code');
  863. $xml->read();
  864. $value = $xml->value;
  865. $codes[$code] = $value;
  866. }
  867. }
  868. return $codes;
  869. }
  870. /**
  871. * @stephen
  872. *
  873. * @param $xml
  874. * @param $ind1
  875. *
  876. * @return
  877. *
  878. *
  879. * @ingroup tripal_pub
  880. */
  881. function tripal_pub_remote_search_AGL_get_author($xml, $ind1) {
  882. $author = array();
  883. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  884. foreach ($codes as $code => $value) {
  885. switch ($code) {
  886. case 'a':
  887. // remove any trailing commas
  888. $value = preg_replace('/,$/', '', $value);
  889. if ($ind1 == 0) { // Given Name is first
  890. $author['Given Name'] = $names[0];
  891. }
  892. if ($ind1 == 1) { // Surname is first
  893. // split the parts of the name using a comma
  894. $names = explode(',', $value);
  895. $author['Surname'] = $names[0];
  896. $author['Given Name'] = '';
  897. unset($names[0]);
  898. foreach($names as $index => $name) {
  899. $author['Given Name'] .= $name . ' ';
  900. }
  901. $first_names = explode(' ', $author['Given Name']);
  902. $author['First Initials'] = '';
  903. foreach ($first_names as $index => $name) {
  904. $author['First Initials'] .= substr($name, 0, 1);
  905. }
  906. }
  907. if ($ind1 == 3) { // A family name
  908. }
  909. break;
  910. }
  911. }
  912. return $author;
  913. }