tripal_pub.AGL.inc 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882
  1. <?php
  2. /**
  3. *
  4. */
  5. function tripal_pub_remote_alter_form_AGL($form, $form_state, $num_criteria = 1) {
  6. // So far we haven't been able to get AGL to filter results to only
  7. // include pubs by the XX number days in the past. So, we will
  8. // change the 'days' element to be the year to query
  9. $form['themed_element']['days']['#title'] = t('Year');
  10. $form['themed_element']['days']['#description'] = t('Please enter a year to limit records by the year they were published, created or modified in the database.');
  11. // The Journal Name filter doesn't seem to work, so remove it
  12. for($i = 1; $i <= $num_criteria; $i++) {
  13. unset($form['themed_element']['criteria'][$i]["scope-$i"]['#options']['journal']);
  14. }
  15. return $form;
  16. }
  17. /**
  18. *
  19. */
  20. function tripal_pub_remote_validate_form_AGL($form, $form_state) {
  21. $days = trim($form_state['values']["days"]);
  22. $num_criteria = $form_state['values']['num_criteria'];
  23. if ($days and !preg_match('/^\d\d\d\d$/', $days)) {
  24. form_set_error("days", "Please enter a four digit year.");
  25. }
  26. $num_ids = 0;
  27. for ($i = 1; $i <= $num_criteria; $i++) {
  28. $search_terms = trim($form_state['values']["search_terms-$i"]);
  29. $scope = $form_state['values']["scope-$i"];
  30. if ($scope == 'id' and !preg_match('/^AGL:\d+$/', $search_terms)) {
  31. form_set_error("search_terms-$i", "The AGL accession be a numeric value, prefixed with 'AGL:' (e.g. AGL:3890740).");
  32. }
  33. if ($scope == 'id') {
  34. $num_ids++;
  35. }
  36. if($num_ids > 1) {
  37. form_set_error("search_terms-$i", "Unfortuantely, the AGL importer can only support a single accession at a time. Please remove the others.");
  38. }
  39. }
  40. return $form;
  41. }
  42. /**
  43. *
  44. */
  45. function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $page) {
  46. // get some values from the serach array
  47. $num_criteria = $search_array['num_criteria'];
  48. $days = $search_array['days'];
  49. // set some defaults
  50. $search_array['limit'] = $num_to_retrieve;
  51. // To build the CCL search string we want to have a single entry for 'author', 'title', 'abstract'
  52. // or 'id', and also the corresponding 'not for each of those.
  53. // But the search form allows the user to have multiple rows of the same type. So, we will build the
  54. // search string separately for each category and it's negative category (if NOT is selected as the op)
  55. // and at the end we will put them together into a single search string. We need to keep
  56. // track of the first entry of any category because it will not have an op (e.g. 'or' or 'and') but the
  57. // operation will be pushed out to separate the categories. The op for any second or third instance of
  58. // the same category will be included within the search string for the catgory.
  59. $ccl = '';
  60. $title = '';
  61. $author = '';
  62. $abstract = '';
  63. $id = '';
  64. $any = '';
  65. $negate_title = '';
  66. $negate_author = '';
  67. $negate_abstract = '';
  68. $negate_id = '';
  69. $negate_any = '';
  70. $order = array();
  71. $first_abstract = 1;
  72. $first_author = 1;
  73. $first_title = 1;
  74. $first_id = 1;
  75. $first_any = 1;
  76. $first_negate_abstract = 1;
  77. $first_negate_author = 1;
  78. $first_negate_title = 1;
  79. $first_negate_id = 1;
  80. $first_negate_any = 1;
  81. for ($i = 1; $i <= $num_criteria; $i++) {
  82. $search_terms = trim($search_array['criteria'][$i]['search_terms']);
  83. $scope = $search_array['criteria'][$i]['scope'];
  84. $is_phrase = $search_array['criteria'][$i]['is_phrase'];
  85. $op = $search_array['criteria'][$i]['operation'];
  86. if ($op) {
  87. $op = strtolower($op);
  88. }
  89. $search_terms = trim($search_terms);
  90. // if this is not a phrase then make sure the AND and OR are lower-case
  91. if (!$is_phrase) {
  92. $search_terms = preg_replace('/ OR /', ' or ', $search_terms);
  93. $search_terms = preg_replace('/ AND /', ' and ', $search_terms);
  94. }
  95. // else make sure the search terms are surrounded by quotes
  96. else {
  97. $search_terms = "\"$search_terms\"";
  98. }
  99. // if this is a 'not' operation then we want to change it to an
  100. // and
  101. $negate = '';
  102. if ($op == 'not') {
  103. $scope = "negate_$scope";
  104. $op = 'or';
  105. }
  106. $order[] = array('scope' => $scope, 'op' => $op);
  107. // build each category
  108. if ($scope == 'title') {
  109. if ($first_title) {
  110. $title .= "($search_terms) ";
  111. $first_title = 0;
  112. }
  113. else {
  114. $title .= "$op ($search_terms) ";
  115. }
  116. }
  117. if ($scope == 'negate_title') {
  118. if ($first_negate_title) {
  119. $negate_title .= "($search_terms) ";
  120. $first_negate_title = 0;
  121. }
  122. else {
  123. $negate_title .= "$op ($search_terms) ";
  124. }
  125. }
  126. elseif ($scope == 'author') {
  127. if ($first_author) {
  128. $author .= "($search_terms) ";
  129. $first_author = 0;
  130. }
  131. else {
  132. $author .= "$op ($search_terms) ";
  133. }
  134. }
  135. elseif ($scope == 'negate_author') {
  136. if ($first_negate_author) {
  137. $negate_author .= "($search_terms) ";
  138. $first_negate_author = 0;
  139. }
  140. else {
  141. $negate_author .= "$op ($search_terms) ";
  142. }
  143. }
  144. elseif ($scope == 'abstract') {
  145. if ($first_abstract) {
  146. $abstract .= "($search_terms) ";
  147. $first_abstract = 0;
  148. }
  149. else {
  150. $abstract .= "$op ($search_terms) ";
  151. }
  152. }
  153. elseif ($scope == 'negate_abstract') {
  154. if ($first_negate_abstract) {
  155. $negate_abstract .= "($search_terms) ";
  156. $first_negate_abstract = 0;
  157. }
  158. else {
  159. $negate_abstract .= "$op ($search_terms) ";
  160. }
  161. }
  162. elseif ($scope == 'journal') {
  163. if ($first_journal) {
  164. $journal .= "($search_terms) ";
  165. $first_jounral = 0;
  166. }
  167. else {
  168. $journal .= "$op ($search_terms) ";
  169. }
  170. }
  171. elseif ($scope == 'negate_journal') {
  172. if ($first_negate_journal) {
  173. $negate_journal .= "($search_terms) ";
  174. $first_negate_journal = 0;
  175. }
  176. else {
  177. $negate_journal .= "$op ($search_terms) ";
  178. }
  179. }
  180. elseif ($scope == 'id') {
  181. if ($first_id) {
  182. $id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  183. $first_id = 0;
  184. }
  185. else {
  186. $id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  187. }
  188. }
  189. elseif ($scope == 'negate_id') {
  190. if ($first_negate_id) {
  191. $negate_id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  192. $first_negate_id = 0;
  193. }
  194. else {
  195. $negate_id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  196. }
  197. }
  198. elseif ($scope == 'any'){
  199. if ($first_any) {
  200. $any .= "($search_terms) ";
  201. $first_any = 0;
  202. }
  203. else {
  204. $any .= "$op ($search_terms) ";
  205. }
  206. }
  207. elseif ($scope == 'negate_any'){
  208. if ($first_negate_any) {
  209. $negate_any .= "($search_terms) ";
  210. $first_any = 0;
  211. }
  212. else {
  213. $negate_any .= "$op ($search_terms) ";
  214. }
  215. }
  216. }
  217. // now build the CCL string in order
  218. $abstract_done = 0;
  219. $author_done = 0;
  220. $journal_done = 0;
  221. $title_done = 0;
  222. $id_done = 0;
  223. $any_done = 0;
  224. $negate_abstract_done = 0;
  225. $negate_journal_done = 0;
  226. $negate_author_done = 0;
  227. $negate_title_done = 0;
  228. $negate_id_done = 0;
  229. $negate_any_done = 0;
  230. for ($i = 0; $i < count($order) ; $i++) {
  231. if ($order[$i]['scope'] == 'abstract' and !$abstract_done) {
  232. $op = $order[$i]['op'];
  233. $ccl .= "$op abstract=($abstract) ";
  234. $abstract_done = 1;
  235. }
  236. if ($order[$i]['scope'] == 'negate_abstract' and !$negate_abstract_done) {
  237. $ccl .= "not abstract=($negate_abstract) ";
  238. $negate_abstract_done = 1;
  239. }
  240. if ($order[$i]['scope'] == 'author' and !$author_done) {
  241. $op = $order[$i]['op'];
  242. $ccl .= "$op author=($author) ";
  243. $author_done = 1;
  244. }
  245. if ($order[$i]['scope'] == 'negate_author' and !$negate_author_done) {
  246. $ccl .= "not author=($negate_author) ";
  247. $negate_author_done = 1;
  248. }
  249. if ($order[$i]['scope'] == 'journal' and !$journal_done) {
  250. $op = $order[$i]['op'];
  251. $ccl .= "$op journal=($journal) ";
  252. $journal_done = 1;
  253. }
  254. if ($order[$i]['scope'] == 'negate_journal' and !$negate_journal_done) {
  255. $ccl .= "not author=($negate_journal) ";
  256. $negate_journal_done = 1;
  257. }
  258. if ($order[$i]['scope'] == 'id' and !$id_done) {
  259. $op = $order[$i]['op'];
  260. $ccl .= "$op id=($id) ";
  261. $id_done = 1;
  262. }
  263. if ($order[$i]['scope'] == 'negate_id' and !$negate_id_done) {
  264. $ccl .= "not id=($negate_id) ";
  265. $negate_id_done = 1;
  266. }
  267. if ($order[$i]['scope'] == 'title' and !$title_done) {
  268. $op = $order[$i]['op'];
  269. $ccl .= "$op title=($title) ";
  270. $title_done = 1;
  271. }
  272. if ($order[$i]['scope'] == 'negate_title' and !$negate_title_done) {
  273. $ccl .= "not title=($negate_title) ";
  274. $negate_title_done = 1;
  275. }
  276. if ($order[$i]['scope'] == 'any' and !$any_done) {
  277. $op = $order[$i]['op'];
  278. $ccl .= "$op ($any) ";
  279. $any_done = 1;
  280. }
  281. if ($order[$i]['scope'] == 'negate_any' and !$negate_any_done) {
  282. $ccl .= "not ($negate_any) ";
  283. $negate_any_done = 1;
  284. }
  285. }
  286. // for AGL the 'days' form element was converted to represent the year
  287. if ($days) {
  288. $ccl .= "and year=($days)";
  289. }
  290. // remove any preceeding 'and' or 'or'
  291. $ccl = preg_replace('/^\s*(and|or)/', '', $ccl);
  292. // yaz_connect() prepares for a connection to a Z39.50 server. This function is non-blocking
  293. // and does not attempt to establish a connection - it merely prepares a connect to be
  294. // performed later when yaz_wait() is called.
  295. //$yazc = yaz_connect('agricola.nal.usda.gov:7090/voyager'); // NAL Catalog
  296. $yazc = yaz_connect('agricola.nal.usda.gov:7190/voyager'); // NAL Article Citation Database
  297. // use the USMARC record type. But OPAC is also supported by Agricola
  298. yaz_syntax($yazc, "usmarc");
  299. // the search query is built using CCL, we need to first
  300. // configure it so it can map the attributes to defined identifiers
  301. // The attribute set used by AGL can be found at the bottom of this page:
  302. // http://agricola.nal.usda.gov/help/z3950.html
  303. //
  304. // More in depth details: http://www.loc.gov/z3950/agency/bib1.html
  305. //
  306. // CCL Syntax: http://www.indexdata.com/yaz/doc/tools.html#CCL
  307. //
  308. $fields = array(
  309. "title" => "u=4",
  310. "author" => "u=1003",
  311. "abstract" => "u=62",
  312. "id" => "u=12",
  313. "year" => "u=30 r=o",
  314. "journal" => "u=1033"
  315. );
  316. yaz_ccl_conf($yazc, $fields);
  317. if (!yaz_ccl_parse($yazc, $ccl, $cclresult)) {
  318. drupal_set_message('Error parsing search string: ' . $cclresult["errorstring"], "error");
  319. watchdog('tpub_import', 'Error: %errstr', array('%errstr' => $cclresult["errorstring"]), WATCHDOG_ERROR);
  320. return array(
  321. 'total_records' => 0,
  322. 'search_str' => '',
  323. 'pubs' => array(),
  324. );
  325. }
  326. $search_str = $cclresult["rpn"];
  327. // get the total number of records
  328. $total_records = tripal_pub_AGL_count($yazc, $search_str);
  329. // get the pubs in the specified rang
  330. $start = $page * $num_to_retrieve;
  331. $results = tripal_pub_AGL_range($yazc, $search_str, $start, $num_to_retrieve, $total_records);
  332. // close the connection
  333. yaz_close($yazc);
  334. return $results;
  335. }
  336. /**
  337. *
  338. */
  339. function tripal_pub_AGL_range($yazc, $search_str, $start, $num_to_retrieve, $total_records) {
  340. yaz_range($yazc, 1, $total_records);
  341. if (!yaz_present($yazc)) {
  342. $error_no = yaz_errno($yazc);
  343. $error_msg = yaz_error($yazc);
  344. $additional = yaz_addinfo($yazc);
  345. if ($additional != $error_msg) {
  346. $error_msg .= " $additional";
  347. }
  348. drupal_set_message("ERROR waiting on search at AGL: ($error_no) $error_msg", "error");
  349. watchdog('tpub_import', "ERROR waiting on search at AGL: (%error_no) %error_msg",
  350. array('%error_no' => $error_no, '%error_msg' => $error_msg), WATCHDOG_ERROR);
  351. return array(
  352. 'total_records' => $total_records,
  353. 'search_str' => $search_str,
  354. 'pubs' => array(),
  355. );
  356. }
  357. if ($start + $num_to_retrieve > $total_records) {
  358. $num_to_retrieve = $total_records - $start;
  359. }
  360. $pubs = array();
  361. for($i = $start; $i < $start + $num_to_retrieve; $i++) {
  362. $pub_xml = yaz_record($yazc, $i + 1, 'xml; charset=marc-8,utf-8');
  363. $pub = tripal_pub_AGL_parse_pubxml($pub_xml);
  364. $pubs[] = $pub;
  365. }
  366. return array(
  367. 'total_records' => $total_records,
  368. 'search_str' => $search_str,
  369. 'pubs' => $pubs,
  370. );
  371. }
  372. /*
  373. * This function is used as the callback function when used with the
  374. * tripal_pager_callback function. This function returns a count of
  375. * the dataset to be paged.
  376. */
  377. function tripal_pub_AGL_count($yazc, $search_str) {
  378. //yaz_sort($yazc, "1=31 id"); // sort by publication date descending
  379. if (!yaz_search($yazc, "rpn", $search_str)){
  380. $error_no = yaz_errno($yazc);
  381. $error_msg = yaz_error($yazc);
  382. $additional = yaz_addinfo($yazc);
  383. if ($additional != $error_msg) {
  384. $error_msg .= " $additional";
  385. }
  386. drupal_set_message("ERROR preparing search at AGL: ($error_no) $error_msg", "error");
  387. watchdog('tpub_import', "ERROR preparing search at AGL: (%error_no) %error_msg",
  388. array('%error_no' => $error_no, '%error_msg' => $error_msg), WATCHDOG_ERROR);
  389. return 0;
  390. }
  391. if (!yaz_wait()) {
  392. $error_no = yaz_errno($yazc);
  393. $error_msg = yaz_error($yazc);
  394. $additional = yaz_addinfo($yazc);
  395. if ($additional != $error_msg) {
  396. $error_msg .= " $additional";
  397. }
  398. drupal_set_message("ERROR waiting on search at AGL: ($error_no) $error_msg", "error");
  399. watchdog('tpub_import', "ERROR waiting on search at AGL: (%error_no) %error_msg",
  400. array('%error_no' => $error_no, '%error_msg' => $error_msg), WATCHDOG_ERROR);
  401. return 0;
  402. }
  403. // get the total number of results from the serach
  404. $count = yaz_hits($yazc);
  405. return $count;
  406. }
  407. /*
  408. * Description of XML format:
  409. * http://www.loc.gov/marc/bibliographic/bdsummary.html
  410. *
  411. */
  412. function tripal_pub_AGL_parse_pubxml($pub_xml) {
  413. $pub = array();
  414. // we will set the default publication type as a journal article. The NAL
  415. // dataset doesn't specify an article type so we'll have to glean the type
  416. // from other information (e.g. series name has 'Proceedings' in it)
  417. $pub['Publication Type'][0] = 'Journal Article';
  418. if (!$pub_xml) {
  419. return $pub;
  420. }
  421. // read the XML and iterate through it.
  422. $xml = new XMLReader();
  423. $xml->xml(trim($pub_xml));
  424. while ($xml->read()) {
  425. $element = $xml->name;
  426. if ($xml->nodeType == XMLReader::ELEMENT and $element == 'controlfield') {
  427. $tag = $xml->getAttribute('tag');
  428. $xml->read();
  429. $value = $xml->value;
  430. switch ($tag) {
  431. case '001': // control number
  432. $pub['Publication Accession'] = $value;
  433. break;
  434. case '003': // control number identifier
  435. break;
  436. case '005': // datea nd time of latest transaction
  437. break;
  438. case '006': // fixed-length data elemetns
  439. break;
  440. case '007': // physical description fixed field
  441. break;
  442. case '008': // fixed length data elements
  443. $month = array(
  444. '01' => 'Jan', '02' => 'Feb', '03' => 'Mar',
  445. '04' => 'Apr', '05' => 'May', '06' => 'Jun',
  446. '07' => 'Jul', '08' => 'Aug', '09' => 'Sep',
  447. '10' => 'Oct', '11' => 'Nov', '12' => 'Dec'
  448. );
  449. $date0 = substr($value, 0, 6); // date entered on file
  450. $date1 = substr($value, 7, 4); // year of publication
  451. $date2 = substr($value, 11, 4); // month of publication
  452. $place = substr($value, 15, 3);
  453. $lang = substr($value, 35, 3);
  454. if (preg_match('/\d\d\d\d/', $date1)) {
  455. $pub['Year'] = $date1;
  456. $pub['Publication Date'] = $date1;
  457. }
  458. if (preg_match('/\d\d/', $date2)) {
  459. $pub['Publication Date'] = $date1 . " " . $month[substr($date2, 0, 2)] . " " . substr($date2, 3, 2);
  460. }
  461. if (!preg_match('/\s+/', $place)) {
  462. $pub['Published Location'] = $place;
  463. }
  464. if (!preg_match('/\s+/', $lang)) {
  465. $pub['Language Abbr'] = $lang;
  466. }
  467. break;
  468. default: // unhandled tag
  469. break;
  470. }
  471. }
  472. elseif ($xml->nodeType == XMLReader::ELEMENT and $element == 'datafield') {
  473. $tag = $xml->getAttribute('tag');
  474. $ind1 = $xml->getAttribute('ind1');
  475. $ind2 = $xml->getAttribute('ind2');
  476. switch ($tag) {
  477. case '16': // National Bibliographic Agency Control Number
  478. break;
  479. case '35': // System Control Number
  480. $author = array();
  481. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  482. foreach ($codes as $code => $value) {
  483. switch ($code) {
  484. case 'a': // System control number
  485. $pub['Publication Accession'] = $value;
  486. break;
  487. }
  488. }
  489. case '40': // Cataloging Source (NR)
  490. $author = array();
  491. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  492. foreach ($codes as $code => $value) {
  493. switch ($code) {
  494. case 'a': // original cataolging agency
  495. $pub['Publication Database'] = $value;
  496. break;
  497. }
  498. }
  499. break;
  500. case '72': // Subject Category Code
  501. break;
  502. case '100': // main entry-personal name
  503. $author = tripal_pub_remote_search_AGL_get_author($xml, $ind1);
  504. $pub['Author List'][] = $author;
  505. break;
  506. case '110': // main entry-corporate nmae
  507. $author = array();
  508. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  509. foreach ($codes as $code => $value) {
  510. switch ($code) {
  511. case 'a': // Corporate name or jurisdiction name as entry elemen
  512. $author['Collective'] = $value;
  513. break;
  514. case 'b': // Subordinate unit
  515. $author['Collective'] .= ' ' . $value;
  516. break;
  517. }
  518. }
  519. $pub['Author List'][] = $author;
  520. break;
  521. case '111': // main entry-meeting name
  522. break;
  523. case '130': // main entry-uniform title
  524. break;
  525. case '210': // abbreviated title
  526. break;
  527. case '222': // key title
  528. break;
  529. case '240': // uniform title
  530. break;
  531. case '242': // translation of title by cataloging agency
  532. break;
  533. case '243': // collective uniform title
  534. break;
  535. case '245': // title statement
  536. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  537. foreach ($codes as $code => $value) {
  538. switch ($code) {
  539. case 'a':
  540. $pub['Title'] = trim(preg_replace('/\.$/', '', $value));
  541. break;
  542. case 'b':
  543. $pub['Title'] .= ' ' . $value;
  544. break;
  545. case 'h':
  546. $pub['Publication Model'] = $value;
  547. break;
  548. }
  549. }
  550. break;
  551. case '246': // varying form of title
  552. break;
  553. case '247': // former title
  554. break;
  555. case '250': // edition statement
  556. break;
  557. case '254': // musicla presentation statement
  558. break;
  559. case '255': // cartographic mathematical data
  560. break;
  561. case '256': // computer file characteristics
  562. break;
  563. case '257': // country of producing entity
  564. break;
  565. case '258': // philatelic issue data
  566. break;
  567. case '260': // publication, distribution ,etc (imprint)
  568. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  569. foreach ($codes as $code => $value) {
  570. switch ($code) {
  571. case 'a':
  572. $pub['Published Location'] = $value;
  573. break;
  574. case 'b':
  575. $pub['Publisher'] = $value;
  576. break;
  577. case 'c':
  578. $pub['Publication Date'] = $value;
  579. break;
  580. }
  581. }
  582. break;
  583. case '263': // projected publication date
  584. break;
  585. case '264': // production, publication, distribution, manufacture and copyright notice
  586. break;
  587. case '270': // Address
  588. break;
  589. case '300': // Address
  590. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  591. foreach ($codes as $code => $value) {
  592. switch ($code) {
  593. case 'a':
  594. $pages = $value;
  595. $pages = preg_replace('/^p\. /', '', $pages);
  596. $pages = preg_replace('/\.$/', '' , $pages);
  597. if(preg_match('/p$/', $pages)) {
  598. // skip this, it's the number of pages not the page numbers
  599. }
  600. else {
  601. $pub['Pages'] = $pages;
  602. }
  603. break;
  604. }
  605. }
  606. break;
  607. case '500': // series statements
  608. $pub['Notes'] = $value;
  609. break;
  610. case '504': // Bibliography, Etc. Note
  611. break;
  612. case '520': // Summary, etc
  613. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  614. foreach ($codes as $code => $value) {
  615. switch ($code) {
  616. case 'a':
  617. $pub['Abstract'] = $value;
  618. break;
  619. }
  620. }
  621. break;
  622. case '650': // Subject Added Entry-Topical Term
  623. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  624. foreach ($codes as $code => $value) {
  625. switch ($code) {
  626. case 'a':
  627. $pub['Keywords'][] = $value;
  628. break;
  629. }
  630. }
  631. break;
  632. case '653': // Index Term-Uncontrolled
  633. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  634. foreach ($codes as $code => $value) {
  635. switch ($code) {
  636. case 'a':
  637. $pub['Keywords'][] = $value;
  638. break;
  639. }
  640. }
  641. break;
  642. case '700': // Added Entry-Personal Name
  643. $author = tripal_pub_remote_search_AGL_get_author($xml, $ind1);
  644. $pub['Author List'][] = $author;
  645. break;
  646. case '710': // Added Entry-Corporate Name
  647. $author = array();
  648. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  649. foreach ($codes as $code => $value) {
  650. switch ($code) {
  651. case 'a': // Corporate name or jurisdiction name as entry elemen
  652. $author['Collective'] = $value;
  653. break;
  654. case 'b': // Subordinate unit
  655. $author['Collective'] .= ' ' . $value;
  656. break;
  657. }
  658. }
  659. $pub['Author List'][] = $author;
  660. break;
  661. case '773': // host item entry
  662. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  663. foreach ($codes as $code => $value) {
  664. switch ($code) {
  665. case 'a':
  666. if (preg_match('/Proceedings/i', $value)) {
  667. $pub['Series Name'] = preg_replace('/\.$/', '', $value);
  668. $pub['Publication Type'][0] = 'Conference Proceedings';
  669. }
  670. else {
  671. $pub['Journal Name'] = preg_replace('/\.$/', '', $value);
  672. }
  673. break;
  674. case 't':
  675. if (preg_match('/Proceedings/i', $value)) {
  676. $pub['Series Name'] = preg_replace('/\.$/', '', $value);
  677. $pub['Publication Type'][0] = 'Conference Proceedings';
  678. }
  679. $pub['Journal Name'] = preg_replace('/\.$/', '', $value);
  680. break;
  681. case 'g':
  682. $matches = array();
  683. if (preg_match('/^(\d\d\d\d)/', $value, $matches)) {
  684. $pub['Publication Date'] = $matches[1];
  685. }
  686. elseif (preg_match('/(.*?)(\.|\s+)\s*(\d+),\s(\d\d\d\d)/', $value, $matches)) {
  687. $year = $matches[4];
  688. $month = $matches[1];
  689. $day = $matches[3];
  690. $pub['Publication Date'] = "$year $month $day";
  691. }
  692. elseif (preg_match('/\((.*?)(\.|\s+)(\d\d\d\d)\)/', $value, $matches)) {
  693. $year = $matches[3];
  694. $month = $matches[1];
  695. $pub['Publication Date'] = "$year $month";
  696. }
  697. elseif (preg_match('/^(.*?) (\d\d\d\d)/', $value, $matches)) {
  698. $year = $matches[2];
  699. $month = $matches[1];
  700. $pub['Publication Date'] = "$year $month";
  701. }
  702. if (preg_match('/v\. (.*?)(,|\s+)/', $value, $matches)) {
  703. $pub['Volume'] = $matches[1];
  704. }
  705. if (preg_match('/v\. (.*?)(,|\s+)\((.*?)\)/', $value, $matches)) {
  706. $pub['Volume'] = $matches[1];
  707. $pub['Issue'] = $matches[3];
  708. }
  709. if (preg_match('/no\. (.*?)(\s|$)/', $value, $matches)) {
  710. $pub['Issue'] = $matches[1];
  711. }
  712. break;
  713. case 'p':
  714. $pub['Journal Abbreviation'] = $value;
  715. break;
  716. case 'z':
  717. $pub['ISBN'] = $value;
  718. break;
  719. }
  720. }
  721. break;
  722. case '852': // Location (Where is the publication held)
  723. break;
  724. case '856': // Electronic Location and Access
  725. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  726. foreach ($codes as $code => $value) {
  727. switch ($code) {
  728. case 'u':
  729. $pub['URL'] = $value;
  730. break;
  731. }
  732. }
  733. break;
  734. default:
  735. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  736. $unhandled[$tag][] = $codes;
  737. break;
  738. }
  739. }
  740. }
  741. //dpm($unhandled);
  742. // build the Dbxref
  743. if ($pub['Publication Database'] != 'AGL') {
  744. }
  745. if ($pub['Publication Accession'] and $pub['Publication Database']) {
  746. $pub['Publication Dbxref'] = $pub['Publication Database'] . ":" . $pub['Publication Accession'];
  747. unset($pub['Publication Accession']);
  748. unset($pub['Publication Database']);
  749. }
  750. // build the full authors list
  751. if (is_array($pub['Author List'])) {
  752. $authors = '';
  753. foreach ($pub['Author List'] as $author) {
  754. if (array_key_exists('valid', $author) and $author['valid'] == 'N') {
  755. // skip non-valid entries. A non-valid entry should have
  756. // a corresponding corrected entry so we can saftely skip it.
  757. continue;
  758. }
  759. if (array_key_exists('Collective', $author)) {
  760. $authors .= $author['Collective'] . ', ';
  761. }
  762. else {
  763. if (array_key_exists('Surname', $author)) {
  764. $authors .= $author['Surname'];
  765. if(array_key_exists('First Initials', $author)) {
  766. $authors .= ' ' . $author['First Initials'];
  767. }
  768. $authors .= ', ';
  769. }
  770. }
  771. }
  772. $authors = substr($authors, 0, -2);
  773. $pub['Authors'] = $authors;
  774. }
  775. else {
  776. $pub['Authors'] = $pub['Author List'];
  777. }
  778. // build the citation
  779. $pub['Citation'] = tripal_pub_create_citation($pub);
  780. $pub['raw'] = $pub_xml;
  781. return $pub;
  782. }
  783. /*
  784. *
  785. *
  786. */
  787. function tripal_pub_remote_search_AGL_get_subfield($xml) {
  788. $codes = array();
  789. while ($xml->read()) {
  790. $sub_element = $xml->name;
  791. // when we've reached the end of the datafield element then break out of the while loop
  792. if ($xml->nodeType == XMLReader::END_ELEMENT and $sub_element == 'datafield') {
  793. return $codes;
  794. }
  795. // if inside the subfield element then get the code
  796. if ($xml->nodeType == XMLReader::ELEMENT and $sub_element == 'subfield') {
  797. $code = $xml->getAttribute('code');
  798. $xml->read();
  799. $value = $xml->value;
  800. $codes[$code] = $value;
  801. }
  802. }
  803. return $codes;
  804. }
  805. /*
  806. *
  807. *
  808. */
  809. function tripal_pub_remote_search_AGL_get_author($xml, $ind1) {
  810. $author = array();
  811. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  812. foreach ($codes as $code => $value) {
  813. switch ($code) {
  814. case 'a':
  815. // remove any trailing commas
  816. $value = preg_replace('/,$/', '', $value);
  817. if ($ind1 == 0) { // Given Name is first
  818. $author['Given Name'] = $names[0];
  819. }
  820. if ($ind1 == 1) { // Surname is first
  821. // split the parts of the name using a comma
  822. $names = explode(',', $value);
  823. $author['Surname'] = $names[0];
  824. $author['Given Name'] = '';
  825. unset($names[0]);
  826. foreach($names as $index => $name) {
  827. $author['Given Name'] .= $name . ' ';
  828. }
  829. $first_names = explode(' ', $author['Given Name']);
  830. $author['First Initials'] = '';
  831. foreach ($first_names as $index => $name) {
  832. $author['First Initials'] .= substr($name, 0, 1);
  833. }
  834. }
  835. if ($ind1 == 3) { // A family name
  836. }
  837. break;
  838. }
  839. }
  840. return $author;
  841. }