AGL.inc 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817
  1. <?php
  2. /**
  3. *
  4. */
  5. function tripal_pub_remote_alter_form_AGL(&$form, $form_state) {
  6. // So far we haven't been able to get AGL to filter results to only
  7. // include pubs by the XX number days in the past. So, we will
  8. // change the 'days' element to be the year to query
  9. $form['days']['#title'] = t('Year');
  10. $form['days']['#description'] = t('Please enter a year to limit records by the year they were published, created or modified in the database.');
  11. }
  12. /**
  13. *
  14. */
  15. function tripal_pub_remote_validate_form_AGL(&$form, $form_state) {
  16. $days = trim($form_state['values']["days"]);
  17. $num_criteria = $form['num_criteria']['#default_value'];
  18. if ($days and !preg_match('/^\d\d\d\d$/', $days)) {
  19. form_set_error("days", "Please enter a four digit year.");
  20. }
  21. $num_ids = 0;
  22. for ($i = 1; $i <= $num_criteria; $i++) {
  23. $search_terms = trim($form_state['values']["search_terms-$i"]);
  24. $scope = $form_state['values']["scope-$i"];
  25. if ($scope == 'id' and !preg_match('/^AGL:\d+$/', $search_terms)) {
  26. form_set_error("search_terms-$i", "The AGL accession be a numeric value, prefixed with 'AGL:' (e.g. AGL:3890740).");
  27. }
  28. if ($scope == 'id') {
  29. $num_ids++;
  30. }
  31. if($num_ids > 1) {
  32. form_set_error("search_terms-$i", "Unfortuantely, the AGL importer can only support a single accession at a time. Please remove the others.");
  33. }
  34. }
  35. }
  36. /**
  37. *
  38. */
  39. function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $pager_id) {
  40. // get some values from the serach array
  41. $num_criteria = $search_array['num_criteria'];
  42. $days = $search_array['days'];
  43. // set some defaults
  44. $search_array['limit'] = $num_to_retrieve;
  45. // To build the CCL search string we want to have a single entry for 'author', 'title', 'abstract'
  46. // or 'id', and also the corresponding 'not for each of those.
  47. // But the search form allows the user to have multiple rows of the same type. So, we will build the
  48. // search string separately for each category and it's negative category (if NOT is selected as the op)
  49. // and at the end we will put them together into a single search string. We need to keep
  50. // track of the first entry of any category because it will not have an op (e.g. 'or' or 'and') but the
  51. // operation will be pushed out to separate the categories. The op for any second or third instance of
  52. // the same category will be included within the search string for the catgory.
  53. $ccl = '';
  54. $title = '';
  55. $author = '';
  56. $abstract = '';
  57. $id = '';
  58. $any = '';
  59. $negate_title = '';
  60. $negate_author = '';
  61. $negate_abstract = '';
  62. $negate_id = '';
  63. $negate_any = '';
  64. $order = array();
  65. $first_abstract = 1;
  66. $first_author = 1;
  67. $first_title = 1;
  68. $first_id = 1;
  69. $first_any = 1;
  70. $first_negate_abstract = 1;
  71. $first_negate_author = 1;
  72. $first_negate_title = 1;
  73. $first_negate_id = 1;
  74. $first_negate_any = 1;
  75. for ($i = 1; $i <= $num_criteria; $i++) {
  76. $search_terms = trim($search_array['criteria'][$i]['search_terms']);
  77. $scope = $search_array['criteria'][$i]['scope'];
  78. $is_phrase = $search_array['criteria'][$i]['is_phrase'];
  79. $op = $search_array['criteria'][$i]['operation'];
  80. if ($op) {
  81. $op = strtolower($op);
  82. }
  83. $search_terms = trim($search_terms);
  84. // if this is not a phrase then make sure the AND and OR are lower-case
  85. if (!$is_phrase) {
  86. $search_terms = preg_replace('/ OR /', ' or ', $search_terms);
  87. $search_terms = preg_replace('/ AND /', ' and ', $search_terms);
  88. }
  89. // else make sure the search terms are surrounded by quotes
  90. else {
  91. $search_terms = "\"$search_terms\"";
  92. }
  93. // if this is a 'not' operation then we want to change it to an
  94. // and
  95. $negate = '';
  96. if ($op == 'not') {
  97. $scope = "negate_$scope";
  98. $op = 'or';
  99. }
  100. $order[] = array('scope' => $scope, 'op' => $op);
  101. // build each category
  102. if ($scope == 'title') {
  103. if ($first_title) {
  104. $title .= "($search_terms) ";
  105. $first_title = 0;
  106. }
  107. else {
  108. $title .= "$op ($search_terms) ";
  109. }
  110. }
  111. if ($scope == 'negate_title') {
  112. if ($first_negate_title) {
  113. $negate_title .= "($search_terms) ";
  114. $first_negate_title = 0;
  115. }
  116. else {
  117. $negate_title .= "$op ($search_terms) ";
  118. }
  119. }
  120. elseif ($scope == 'author') {
  121. if ($first_author) {
  122. $author .= "($search_terms) ";
  123. $first_author = 0;
  124. }
  125. else {
  126. $author .= "$op ($search_terms) ";
  127. }
  128. }
  129. elseif ($scope == 'negate_author') {
  130. if ($first_negate_author) {
  131. $negate_author .= "($search_terms) ";
  132. $first_negate_author = 0;
  133. }
  134. else {
  135. $negate_author .= "$op ($search_terms) ";
  136. }
  137. }
  138. elseif ($scope == 'abstract') {
  139. if ($first_abstract) {
  140. $abstract .= "($search_terms) ";
  141. $first_abstract = 0;
  142. }
  143. else {
  144. $abstract .= "$op ($search_terms) ";
  145. }
  146. }
  147. elseif ($scope == 'negate_abstract') {
  148. if ($first_negate_abstract) {
  149. $negate_abstract .= "($search_terms) ";
  150. $first_negate_abstract = 0;
  151. }
  152. else {
  153. $negate_abstract .= "$op ($search_terms) ";
  154. }
  155. }
  156. elseif ($scope == 'id') {
  157. if ($first_id) {
  158. $id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  159. $first_id = 0;
  160. }
  161. else {
  162. $id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  163. }
  164. }
  165. elseif ($scope == 'negate_id') {
  166. if ($first_negate_id) {
  167. $negate_id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  168. $first_negate_id = 0;
  169. }
  170. else {
  171. $negate_id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  172. }
  173. }
  174. elseif ($scope == 'any'){
  175. if ($first_any) {
  176. $any .= "($search_terms) ";
  177. $first_any = 0;
  178. }
  179. else {
  180. $any .= "$op ($search_terms) ";
  181. }
  182. }
  183. elseif ($scope == 'negate_any'){
  184. if ($first_negate_any) {
  185. $negate_any .= "($search_terms) ";
  186. $first_any = 0;
  187. }
  188. else {
  189. $negate_any .= "$op ($search_terms) ";
  190. }
  191. }
  192. }
  193. // now build the CCL string in order
  194. $abstract_done = 0;
  195. $author_done = 0;
  196. $title_done = 0;
  197. $id_done = 0;
  198. $any_done = 0;
  199. $negate_abstract_done = 0;
  200. $negate_author_done = 0;
  201. $negate_title_done = 0;
  202. $negate_id_done = 0;
  203. $negate_any_done = 0;
  204. for ($i = 0; $i < count($order) ; $i++) {
  205. if ($order[$i]['scope'] == 'abstract' and !$abstract_done) {
  206. $op = $order[$i]['op'];
  207. $ccl .= "$op abstract=($abstract) ";
  208. $abstract_done = 1;
  209. }
  210. if ($order[$i]['scope'] == 'negate_abstract' and !$negate_abstract_done) {
  211. $ccl .= "not abstract=($negate_abstract) ";
  212. $negate_abstract_done = 1;
  213. }
  214. if ($order[$i]['scope'] == 'author' and !$author_done) {
  215. $op = $order[$i]['op'];
  216. $ccl .= "$op author=($author) ";
  217. $author_done = 1;
  218. }
  219. if ($order[$i]['scope'] == 'negate_author' and !$negate_author_done) {
  220. $ccl .= "not author=($negate_author) ";
  221. $negate_author_done = 1;
  222. }
  223. if ($order[$i]['scope'] == 'id' and !$id_done) {
  224. $op = $order[$i]['op'];
  225. $ccl .= "$op id=($id) ";
  226. $id_done = 1;
  227. }
  228. if ($order[$i]['scope'] == 'negate_id' and !$negate_id_done) {
  229. $ccl .= "not id=($negate_id) ";
  230. $negate_id_done = 1;
  231. }
  232. if ($order[$i]['scope'] == 'title' and !$title_done) {
  233. $op = $order[$i]['op'];
  234. $ccl .= "$op title=($title) ";
  235. $title_done = 1;
  236. }
  237. if ($order[$i]['scope'] == 'negate_title' and !$negate_title_done) {
  238. $ccl .= "not title=($negate_title) ";
  239. $negate_title_done = 1;
  240. }
  241. if ($order[$i]['scope'] == 'any' and !$any_done) {
  242. $op = $order[$i]['op'];
  243. $ccl .= "$op ($any) ";
  244. $any_done = 1;
  245. }
  246. if ($order[$i]['scope'] == 'negate_any' and !$negate_any_done) {
  247. $ccl .= "not ($negate_any) ";
  248. $negate_any_done = 1;
  249. }
  250. }
  251. // for AGL the 'days' form element was converted to represent the year
  252. if ($days) {
  253. $ccl .= "and year=($days)";
  254. }
  255. // remove any preceeding 'and' or 'or'
  256. $ccl = preg_replace('/^\s*(and|or)/', '', $ccl);
  257. // yaz_connect() prepares for a connection to a Z39.50 server. This function is non-blocking
  258. // and does not attempt to establish a connection - it merely prepares a connect to be
  259. // performed later when yaz_wait() is called.
  260. //$yazc = yaz_connect('agricola.nal.usda.gov:7090/voyager'); // NAL Catalog
  261. $yazc = yaz_connect('agricola.nal.usda.gov:7190/voyager'); // NAL Article Citation Database
  262. // use the USMARC record type. But OPAC is also supported by Agricola
  263. yaz_syntax($yazc, "usmarc");
  264. // the search query is built using CCL, we need to first
  265. // configure it so it can map the attributes to defined identifiers
  266. // The attribute set used by AGL can be found at the bottom of this page:
  267. // http://agricola.nal.usda.gov/help/z3950.html
  268. //
  269. // More in depth details: http://www.loc.gov/z3950/agency/bib1.html
  270. //
  271. // CCL Syntax: http://www.indexdata.com/yaz/doc/tools.html#CCL
  272. //
  273. $fields = array(
  274. "title" => "u=4",
  275. "author" => "u=1003",
  276. "abstract" => "u=62",
  277. "id" => "u=12",
  278. "year" => "u=30 r=o",
  279. );
  280. yaz_ccl_conf($yazc, $fields);
  281. if (!yaz_ccl_parse($yazc, $ccl, &$cclresult)) {
  282. drupal_set_message('Error parsing search string: ' . $cclresult["errorstring"], "error");
  283. watchdog('tripal_pub', 'Error: %errstr', array('%errstr' => $cclresult["errorstring"]), WATCHDOG_ERROR);
  284. return array();
  285. }
  286. $search_str = $cclresult["rpn"];
  287. $search_array['search_string'] = $search_str;
  288. // save the YAZ connection in the session for use by other functions
  289. $_SESSION['tripal_pub_AGL_query'][$search_str]['yaz_connection'] = $yazc;
  290. // we want to get the list of pubs using the search terms but using a Drupal style pager
  291. $pubs = tripal_pager_callback('tripal_pub_AGL_range', $num_to_retrieve, $pager_id,
  292. 'tripal_pub_AGL_count', $search_array);
  293. // close the connection
  294. unset($_SESSION['tripal_pub_AGL_query'][$search_str]['yaz_connection']);
  295. yaz_close($yazc);
  296. return $pubs;
  297. }
  298. /*
  299. * This function is used as the callback function when used with the
  300. * tripal_pager_callback function. This function returns a count of
  301. * the dataset to be paged.
  302. */
  303. function tripal_pub_AGL_count($search_array) {
  304. $search_str = $search_array['search_string'];
  305. $days = $search_array['days'];
  306. $limit = $search_array['limit'];
  307. $yazc = $_SESSION['tripal_pub_AGL_query'][$search_str]['yaz_connection'];
  308. //yaz_sort($yazc, "1=31 id"); // sort by publication date descending
  309. if (!yaz_search($yazc, "rpn", $search_str)){
  310. $error_no = yaz_errno($yazc);
  311. $error_msg = yaz_error($yazc);
  312. $additional = yaz_addinfo($yazc);
  313. if ($additional != $error_msg) {
  314. $error_msg .= " $additional";
  315. }
  316. drupal_set_message("ERROR preparing search at AGL: ($error_no) $error_msg", "error");
  317. return 0;
  318. }
  319. if (!yaz_wait()) {
  320. $error_no = yaz_errno($yazc);
  321. $error_msg = yaz_error($yazc);
  322. $additional = yaz_addinfo($yazc);
  323. if ($additional != $error_msg) {
  324. $error_msg .= " $additional";
  325. }
  326. drupal_set_message("ERROR waiting on search at AGL: ($error_no) $error_msg", "error");
  327. return 0;
  328. }
  329. // get the total number of results from the serach
  330. $count = yaz_hits($yazc);
  331. $_SESSION['tripal_pub_AGL_query'][$search_str]['Count'] = $count;
  332. return $count;
  333. }
  334. /*
  335. * This function is used as the callback function when used with the
  336. * tripal_pager_callback function. This function returns the results
  337. * within the specified range
  338. */
  339. function tripal_pub_AGL_range($search_array, $start = 0, $limit = 10) {
  340. $pubs = array();
  341. $search_str = $search_array['search_string'];
  342. $days = $search_array['days'];
  343. $limit = $search_array['limit'];
  344. $yazc = $_SESSION['tripal_pub_AGL_query'][$search_str]['yaz_connection'];
  345. $count = $_SESSION['tripal_pub_AGL_query'][$search_str]['Count'];
  346. yaz_range($yazc, 1, $num_pubs);
  347. if (!yaz_present($yazc)) {
  348. $error_no = yaz_errno($yazc);
  349. $error_msg = yaz_error($yazc);
  350. $additional = yaz_addinfo($yazc);
  351. if ($additional != $error_msg) {
  352. $error_msg .= " $additional";
  353. }
  354. drupal_set_message("ERROR waiting on search at AGL: ($error_no) $error_msg", "error");
  355. return $pubs;
  356. }
  357. if ($start + $limit > $count) {
  358. $limit = $count - $start;
  359. }
  360. for($i = $start; $i < $start + $limit; $i++) {
  361. $pub_xml = yaz_record($yazc, $i + 1, 'xml; charset=marc-8,utf-8');
  362. $pub = tripal_pub_AGL_parse_pubxml($pub_xml);
  363. $pubs[] = $pub;
  364. }
  365. return $pubs;
  366. }
  367. /*
  368. * Description of XML format:
  369. * http://www.loc.gov/marc/bibliographic/bdsummary.html
  370. *
  371. */
  372. function tripal_pub_AGL_parse_pubxml($pub_xml) {
  373. $pub = array();
  374. // publications from the NAL Article Citation Database are all journal articles
  375. $pub['Publication Type'][] = 'Journal Article';
  376. if (!$pub_xml) {
  377. return $pub;
  378. }
  379. // read the XML and iterate through it.
  380. $xml = new XMLReader();
  381. $xml->xml(trim($pub_xml));
  382. while ($xml->read()) {
  383. $element = $xml->name;
  384. if ($xml->nodeType == XMLReader::ELEMENT and $element == 'controlfield') {
  385. $tag = $xml->getAttribute('tag');
  386. $xml->read();
  387. $value = $xml->value;
  388. switch ($tag) {
  389. case '001': // control number
  390. $pub['Publication Accession'] = $value;
  391. break;
  392. case '003': // control number identifier
  393. break;
  394. case '005': // datea nd time of latest transaction
  395. break;
  396. case '006': // fixed-length data elemetns
  397. break;
  398. case '007': // physical description fixed field
  399. break;
  400. case '008': // fixed length data elements
  401. $month = array(
  402. '01' => 'Jan', '02' => 'Feb', '03' => 'Mar',
  403. '04' => 'Apr', '05' => 'May', '06' => 'Jun',
  404. '07' => 'Jul', '08' => 'Aug', '09' => 'Sep',
  405. '10' => 'Oct', '11' => 'Nov', '12' => 'Dec'
  406. );
  407. $date0 = substr($value, 0, 6); // date entered on file
  408. $date1 = substr($value, 7, 4); // year of publication
  409. $date2 = substr($value, 11, 4); // month of publication
  410. $place = substr($value, 15, 3);
  411. $lang = substr($value, 35, 3);
  412. if (preg_match('/\d\d\d\d/', $date1)) {
  413. $pub['Year'] = $date1;
  414. $pub['Publication Date'] = $date1;
  415. }
  416. if (preg_match('/\d\d/', $date2)) {
  417. $pub['Publication Date'] = $date1 . " " . $month[substr($date2, 0, 2)] . " " . substr($date2, 3, 2);
  418. }
  419. if (!preg_match('/\s+/', $place)) {
  420. $pub['Published Location'] = $place;
  421. }
  422. if (!preg_match('/\s+/', $lang)) {
  423. $pub['Language Abbr'] = $lang;
  424. }
  425. break;
  426. default: // unhandled tag
  427. break;
  428. }
  429. }
  430. elseif ($xml->nodeType == XMLReader::ELEMENT and $element == 'datafield') {
  431. $tag = $xml->getAttribute('tag');
  432. $ind1 = $xml->getAttribute('ind1');
  433. $ind2 = $xml->getAttribute('ind2');
  434. switch ($tag) {
  435. case '16': // National Bibliographic Agency Control Number
  436. break;
  437. case '35': // System Control Number
  438. $author = array();
  439. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  440. foreach ($codes as $code => $value) {
  441. switch ($code) {
  442. case 'a': // System control number
  443. $pub['Publication Accession'] = $value;
  444. break;
  445. }
  446. }
  447. case '40': // Cataloging Source (NR)
  448. $author = array();
  449. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  450. foreach ($codes as $code => $value) {
  451. switch ($code) {
  452. case 'a': // original cataolging agency
  453. $pub['Publication Database'] = $value;
  454. break;
  455. }
  456. }
  457. break;
  458. case '72': // Subject Category Code
  459. break;
  460. case '100': // main entry-personal name
  461. $author = tripal_pub_remote_search_AGL_get_author($xml, $ind1);
  462. $pub['Author List'][] = $author;
  463. break;
  464. case '110': // main entry-corporate nmae
  465. $author = array();
  466. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  467. foreach ($codes as $code => $value) {
  468. switch ($code) {
  469. case 'a': // Corporate name or jurisdiction name as entry elemen
  470. $author['Collective'] = $value;
  471. break;
  472. case 'b': // Subordinate unit
  473. $author['Collective'] .= ' ' . $value;
  474. break;
  475. }
  476. }
  477. $pub['Author List'][] = $author;
  478. break;
  479. case '111': // main entry-meeting name
  480. break;
  481. case '130': // main entry-uniform title
  482. break;
  483. case '210': // abbreviated title
  484. break;
  485. case '222': // key title
  486. break;
  487. case '240': // uniform title
  488. break;
  489. case '242': // translation of title by cataloging agency
  490. break;
  491. case '243': // collective uniform title
  492. break;
  493. case '245': // title statement
  494. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  495. foreach ($codes as $code => $value) {
  496. switch ($code) {
  497. case 'a':
  498. $pub['Title'] = trim(preg_replace('/\.$/', '', $value));
  499. break;
  500. case 'b':
  501. $pub['Title'] .= ' ' . $value;
  502. break;
  503. case 'h':
  504. $pub['Publication Model'] = $value;
  505. break;
  506. }
  507. }
  508. break;
  509. case '246': // varying form of title
  510. break;
  511. case '247': // former title
  512. break;
  513. case '250': // edition statement
  514. break;
  515. case '254': // musicla presentation statement
  516. break;
  517. case '255': // cartographic mathematical data
  518. break;
  519. case '256': // computer file characteristics
  520. break;
  521. case '257': // country of producing entity
  522. break;
  523. case '258': // philatelic issue data
  524. break;
  525. case '260': // publication, distribution ,etc (imprint)
  526. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  527. foreach ($codes as $code => $value) {
  528. switch ($code) {
  529. case 'a':
  530. $pub['Published Location'] = $value;
  531. break;
  532. case 'b':
  533. $pub['Publisher'] = $value;
  534. break;
  535. case 'c':
  536. $pub['Publication Date'] = $value;
  537. break;
  538. }
  539. }
  540. break;
  541. case '263': // projected publication date
  542. break;
  543. case '264': // production, publication, distribution, manufacture and copyright notice
  544. break;
  545. case '270': // Address
  546. break;
  547. case '300': // Address
  548. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  549. foreach ($codes as $code => $value) {
  550. switch ($code) {
  551. case 'a':
  552. $pages = $value;
  553. $pages = preg_replace('/^p\. /', '', $pages);
  554. $pages = preg_replace('/\.$/', '' , $pages);
  555. if(preg_match('/p$/', $pages)) {
  556. // skip this, it's the number of pages not the page numbers
  557. }
  558. else {
  559. $pub['Pages'] = $pages;
  560. }
  561. break;
  562. }
  563. }
  564. break;
  565. case '500': // series statements
  566. $pub['Notes'] = $value;
  567. break;
  568. case '504': // Bibliography, Etc. Note
  569. break;
  570. case '520': // Summary, etc
  571. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  572. foreach ($codes as $code => $value) {
  573. switch ($code) {
  574. case 'a':
  575. $pub['Abstract'] = $value;
  576. break;
  577. }
  578. }
  579. break;
  580. case '650': // Subject Added Entry-Topical Term
  581. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  582. foreach ($codes as $code => $value) {
  583. switch ($code) {
  584. case 'a':
  585. $pub['Keywords'][] = $value;
  586. break;
  587. }
  588. }
  589. break;
  590. case '653': // Index Term-Uncontrolled
  591. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  592. foreach ($codes as $code => $value) {
  593. switch ($code) {
  594. case 'a':
  595. $pub['Keywords'][] = $value;
  596. break;
  597. }
  598. }
  599. break;
  600. case '700': // Added Entry-Personal Name
  601. $author = tripal_pub_remote_search_AGL_get_author($xml, $ind1);
  602. $pub['Author List'][] = $author;
  603. break;
  604. case '710': // Added Entry-Corporate Name
  605. $author = array();
  606. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  607. foreach ($codes as $code => $value) {
  608. switch ($code) {
  609. case 'a': // Corporate name or jurisdiction name as entry elemen
  610. $author['Collective'] = $value;
  611. break;
  612. case 'b': // Subordinate unit
  613. $author['Collective'] .= ' ' . $value;
  614. break;
  615. }
  616. }
  617. $pub['Author List'][] = $author;
  618. break;
  619. case '773': // host item entry
  620. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  621. foreach ($codes as $code => $value) {
  622. switch ($code) {
  623. case 't':
  624. $pub['Journal Name'] = preg_replace('/\.$/', '', $value);
  625. break;
  626. case 'g':
  627. $matches = array();
  628. if (preg_match('/^(\d\d\d\d)/', $value, $matches)) {
  629. $pub['Publication Date'] = $matches[1];
  630. }
  631. elseif (preg_match('/(.*?)(\.|\s+)\s*(\d+),\s(\d\d\d\d)/', $value, $matches)) {
  632. $year = $matches[4];
  633. $month = $matches[1];
  634. $day = $matches[3];
  635. $pub['Publication Date'] = "$year $month $day";
  636. }
  637. elseif (preg_match('/\((.*?)(\.|\s+)(\d\d\d\d)\)/', $value, $matches)) {
  638. $year = $matches[3];
  639. $month = $matches[1];
  640. $pub['Publication Date'] = "$year $month";
  641. }
  642. elseif (preg_match('/^(.*?) (\d\d\d\d)/', $value, $matches)) {
  643. $year = $matches[2];
  644. $month = $matches[1];
  645. $pub['Publication Date'] = "$year $month";
  646. }
  647. if (preg_match('/v\. (.*?)(,|\s+)/', $value, $matches)) {
  648. $pub['Volume'] = $matches[1];
  649. }
  650. if (preg_match('/v\. (.*?)(,|\s+)\((.*?)\)/', $value, $matches)) {
  651. $pub['Volume'] = $matches[1];
  652. $pub['Issue'] = $matches[3];
  653. }
  654. if (preg_match('/no\. (.*?)(\s|$)/', $value, $matches)) {
  655. $pub['Issue'] = $matches[1];
  656. }
  657. break;
  658. case 'p':
  659. $pub['Journal Abbreviation'] = $value;
  660. break;
  661. case 'z':
  662. $pub['ISBN'] = $value;
  663. break;
  664. }
  665. }
  666. break;
  667. case '852': // Location (Where is the publication held)
  668. break;
  669. case '856': // Electronic Location and Access
  670. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  671. foreach ($codes as $code => $value) {
  672. switch ($code) {
  673. case 'u':
  674. $pub['URL'] = $value;
  675. break;
  676. }
  677. }
  678. break;
  679. default:
  680. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  681. $unhandled[$tag][] = $codes;
  682. break;
  683. }
  684. }
  685. }
  686. //dpm($unhandled);
  687. // build the Dbxref
  688. if ($pub['Publication Database'] != 'AGL') {
  689. }
  690. if ($pub['Publication Accession'] and $pub['Publication Database']) {
  691. $pub['Publication Dbxref'] = $pub['Publication Database'] . ":" . $pub['Publication Accession'];
  692. unset($pub['Publication Accession']);
  693. unset($pub['Publication Database']);
  694. }
  695. // build the full authors list
  696. foreach ($pub['Author List'] as $author) {
  697. if ($author['valid'] == 'N') {
  698. // skip non-valid entries. A non-valid entry should have
  699. // a corresponding corrected entry so we can saftely skip it.
  700. continue;
  701. }
  702. if ($author['Collective']) {
  703. $authors .= $author['Collective'] . ', ';
  704. }
  705. else {
  706. $authors .= $author['Surname'] . ' ' . $author['First Initials'] . ', ';
  707. }
  708. }
  709. $authors = substr($authors, 0, -2);
  710. $pub['Authors'] = $authors;
  711. // build the citation
  712. $pub['Citation'] = tripal_pub_create_citation($pub);
  713. $pub['raw'] = $pub_xml;
  714. return $pub;
  715. }
  716. /*
  717. *
  718. *
  719. */
  720. function tripal_pub_remote_search_AGL_get_subfield($xml) {
  721. $codes = array();
  722. while ($xml->read()) {
  723. $sub_element = $xml->name;
  724. // when we've reached the end of the datafield element then break out of the while loop
  725. if ($xml->nodeType == XMLReader::END_ELEMENT and $sub_element == 'datafield') {
  726. return $codes;
  727. }
  728. // if inside the subfield element then get the code
  729. if ($xml->nodeType == XMLReader::ELEMENT and $sub_element == 'subfield') {
  730. $code = $xml->getAttribute('code');
  731. $xml->read();
  732. $value = $xml->value;
  733. $codes[$code] = $value;
  734. }
  735. }
  736. return $codes;
  737. }
  738. /*
  739. *
  740. *
  741. */
  742. function tripal_pub_remote_search_AGL_get_author($xml, $ind1) {
  743. $author = array();
  744. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  745. foreach ($codes as $code => $value) {
  746. switch ($code) {
  747. case 'a':
  748. // remove any trailing commas
  749. $value = preg_replace('/,$/', '', $value);
  750. if ($ind1 == 0) { // Given Name is first
  751. $author['Given Name'] = $names[0];
  752. }
  753. if ($ind1 == 1) { // Surname is first
  754. // split the parts of the name using a comma
  755. $names = explode(',', $value);
  756. $author['Surname'] = $names[0];
  757. $author['Given Name'] = '';
  758. unset($names[0]);
  759. foreach($names as $index => $name) {
  760. $author['Given Name'] .= $name . ' ';
  761. }
  762. $first_names = explode(' ', $author['Given Name']);
  763. $author['First Initials'] = '';
  764. foreach ($first_names as $index => $name) {
  765. $author['First Initials'] .= substr($name, 0, 1);
  766. }
  767. }
  768. if ($ind1 == 3) { // A family name
  769. }
  770. break;
  771. }
  772. }
  773. return $author;
  774. }