AGL.inc 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802
  1. <?php
  2. /**
  3. *
  4. */
  5. function tripal_pub_remote_alter_form_AGL(&$form, $form_state) {
  6. // So far we haven't been able to get AGL to filter results to only
  7. // include pubs by the XX number days in the past. So, we will
  8. // change the 'days' element to be the year to query
  9. $form['days']['#title'] = t('Year');
  10. $form['days']['#description'] = t('Please enter a year to limit records by the year they were published, created or modified in the database.');
  11. }
  12. /**
  13. *
  14. */
  15. function tripal_pub_remote_validate_form_AGL(&$form, $form_state) {
  16. $days = trim($form_state['values']["days"]);
  17. if ($days and !preg_match('/^\d\d\d\d$/', $days)) {
  18. form_set_error("days", "Please enter a four digit year.");
  19. }
  20. }
  21. /**
  22. *
  23. */
  24. function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $pager_id) {
  25. // get some values from the serach array
  26. $num_criteria = $search_array['num_criteria'];
  27. $days = $search_array['days'];
  28. // set some defaults
  29. $search_array['limit'] = $num_to_retrieve;
  30. // To build the CCL search string we want to have a single entry for 'author', 'title', 'abstract'
  31. // or 'id', and also the corresponding 'not for each of those.
  32. // But the search form allows the user to have multiple rows of the same type. So, we will build the
  33. // search string separately for each category and it's negative category (if NOT is selected as the op)
  34. // and at the end we will put them together into a single search string. We need to keep
  35. // track of the first entry of any category because it will not have an op (e.g. 'or' or 'and') but the
  36. // operation will be pushed out to separate the categories. The op for any second or third instance of
  37. // the same category will be included within the search string for the catgory.
  38. $ccl = '';
  39. $title = '';
  40. $author = '';
  41. $abstract = '';
  42. $id = '';
  43. $any = '';
  44. $negate_title = '';
  45. $negate_author = '';
  46. $negate_abstract = '';
  47. $negate_id = '';
  48. $negate_any = '';
  49. $order = array();
  50. $first_abstract = 1;
  51. $first_author = 1;
  52. $first_title = 1;
  53. $first_id = 1;
  54. $first_any = 1;
  55. $first_negate_abstract = 1;
  56. $first_negate_author = 1;
  57. $first_negate_title = 1;
  58. $first_negate_id = 1;
  59. $first_negate_any = 1;
  60. for ($i = 1; $i <= $num_criteria; $i++) {
  61. $search_terms = trim($search_array['criteria'][$i]['search_terms']);
  62. $scope = $search_array['criteria'][$i]['scope'];
  63. $is_phrase = $search_array['criteria'][$i]['is_phrase'];
  64. $op = $search_array['criteria'][$i]['operation'];
  65. if ($op) {
  66. $op = strtolower($op);
  67. }
  68. $search_terms = trim($search_terms);
  69. // if this is not a phrase then make sure the AND and OR are lower-case
  70. if (!$is_phrase) {
  71. $search_terms = preg_replace('/ OR /', ' or ', $search_terms);
  72. $search_terms = preg_replace('/ AND /', ' and ', $search_terms);
  73. }
  74. // else make sure the search terms are surrounded by quotes
  75. else {
  76. $search_terms = "\"$search_terms\"";
  77. }
  78. // if this is a 'not' operation then we want to change it to an
  79. // and
  80. $negate = '';
  81. if ($op == 'not') {
  82. $scope = "negate_$scope";
  83. $op = 'or';
  84. }
  85. $order[] = array('scope' => $scope, 'op' => $op);
  86. // build each category
  87. if ($scope == 'title') {
  88. if ($first_title) {
  89. $title .= "($search_terms) ";
  90. $first_title = 0;
  91. }
  92. else {
  93. $title .= "$op ($search_terms) ";
  94. }
  95. }
  96. if ($scope == 'negate_title') {
  97. if ($first_negate_title) {
  98. $negate_title .= "($search_terms) ";
  99. $first_negate_title = 0;
  100. }
  101. else {
  102. $negate_title .= "$op ($search_terms) ";
  103. }
  104. }
  105. elseif ($scope == 'author') {
  106. if ($first_author) {
  107. $author .= "($search_terms) ";
  108. $first_author = 0;
  109. }
  110. else {
  111. $author .= "$op ($search_terms) ";
  112. }
  113. }
  114. elseif ($scope == 'negate_author') {
  115. if ($first_negate_author) {
  116. $negate_author .= "($search_terms) ";
  117. $first_negate_author = 0;
  118. }
  119. else {
  120. $negate_author .= "$op ($search_terms) ";
  121. }
  122. }
  123. elseif ($scope == 'abstract') {
  124. if ($first_abstract) {
  125. $abstract .= "($search_terms) ";
  126. $first_abstract = 0;
  127. }
  128. else {
  129. $abstract .= "$op ($search_terms) ";
  130. }
  131. }
  132. elseif ($scope == 'negate_abstract') {
  133. if ($first_negate_abstract) {
  134. $negate_abstract .= "($search_terms) ";
  135. $first_negate_abstract = 0;
  136. }
  137. else {
  138. $negate_abstract .= "$op ($search_terms) ";
  139. }
  140. }
  141. elseif ($scope == 'id') {
  142. if ($first_id) {
  143. $id .= "(" . preg_replace('/AGL:([^\s]*)/', 'id=($1)', $search_terms) . ") ";
  144. $first_id = 0;
  145. }
  146. else {
  147. $id .= "$op (" . preg_replace('/AGL:([^\s]*)/', 'id=($1)', $search_terms) . ") ";
  148. }
  149. }
  150. elseif ($scope == 'negate_id') {
  151. if ($first_negate_id) {
  152. $negate_id .= "(" . preg_replace('/AGL:([^\s]*)/', 'id=($1)', $search_terms) . ") ";
  153. $first_negate_id = 0;
  154. }
  155. else {
  156. $negate_id .= "$op (" . preg_replace('/AGL:([^\s]*)/', 'id=($1)', $search_terms) . ") ";
  157. }
  158. }
  159. elseif ($scope == 'any'){
  160. if ($first_any) {
  161. $any .= "($search_terms) ";
  162. $first_any = 0;
  163. }
  164. else {
  165. $any .= "$op ($search_terms) ";
  166. }
  167. }
  168. elseif ($scope == 'negate_any'){
  169. if ($first_negate_any) {
  170. $negate_any .= "($search_terms) ";
  171. $first_any = 0;
  172. }
  173. else {
  174. $negate_any .= "$op ($search_terms) ";
  175. }
  176. }
  177. }
  178. // now build the CCL string in order
  179. $abstract_done = 0;
  180. $author_done = 0;
  181. $title_done = 0;
  182. $id_done = 0;
  183. $any_done = 0;
  184. $negate_abstract_done = 0;
  185. $negate_author_done = 0;
  186. $negate_title_done = 0;
  187. $negate_id_done = 0;
  188. $negate_any_done = 0;
  189. for ($i = 1; $i < count($order) ; $i++) {
  190. if ($order[$i]['scope'] == 'abstract' and !$abstract_done) {
  191. $op = $order[$i]['op'];
  192. $ccl .= "$op abstract=($abstract) ";
  193. $abstract_done = 1;
  194. }
  195. if ($order[$i]['scope'] == 'negate_abstract' and !$negate_abstract_done) {
  196. $ccl .= "not abstract=($negate_abstract) ";
  197. $negate_abstract_done = 1;
  198. }
  199. if ($order[$i]['scope'] == 'author' and !$author_done) {
  200. $op = $order[$i]['op'];
  201. $ccl .= "$op author=($author) ";
  202. $author_done = 1;
  203. }
  204. if ($order[$i]['scope'] == 'negate_author' and !$negate_author_done) {
  205. $ccl .= "not author=($negate_author) ";
  206. $negate_author_done = 1;
  207. }
  208. if ($order[$i]['scope'] == 'id' and !$id_done) {
  209. $op = $order[$i]['op'];
  210. $ccl .= "$op id=($id) ";
  211. $id_done = 1;
  212. }
  213. if ($order[$i]['scope'] == 'negate_id' and !$negate_id_done) {
  214. $ccl .= "not id=($negate_id) ";
  215. $negate_id_done = 1;
  216. }
  217. if ($order[$i]['scope'] == 'title' and !$title_done) {
  218. $op = $order[$i]['op'];
  219. $ccl .= "$op title=($title) ";
  220. $title_done = 1;
  221. }
  222. if ($order[$i]['scope'] == 'negate_title' and !$negate_title_done) {
  223. $ccl .= "not title=($negate_title) ";
  224. $negate_title_done = 1;
  225. }
  226. if ($order[$i]['scope'] == 'any' and !$any_done) {
  227. $op = $order[$i]['op'];
  228. $ccl .= "$op ($any) ";
  229. $any_done = 1;
  230. }
  231. if ($order[$i]['scope'] == 'negate_any' and !$negate_any_done) {
  232. $ccl .= "not ($negate_any) ";
  233. $negate_any_done = 1;
  234. }
  235. }
  236. // for AGL the 'days' form element was converted to represent the year
  237. if ($days) {
  238. $ccl .= "and year=($days)";
  239. }
  240. // remove any preceeding 'and' or 'or'
  241. $ccl = preg_replace('/^\s*(and|or)/', '', $ccl);
  242. // yaz_connect() prepares for a connection to a Z39.50 server. This function is non-blocking
  243. // and does not attempt to establish a connection - it merely prepares a connect to be
  244. // performed later when yaz_wait() is called.
  245. //$yazc = yaz_connect('agricola.nal.usda.gov:7090/voyager'); // NAL Catalog
  246. $yazc = yaz_connect('agricola.nal.usda.gov:7190/voyager'); // NAL Article Citation Database
  247. // use the USMARC record type. But OPAC is also supported by Agricola
  248. yaz_syntax($yazc, "usmarc");
  249. // the search query is built using CCL, we need to first
  250. // configure it so it can map the attributes to defined identifiers
  251. // The attribute set used by AGL can be found at the bottom of this page:
  252. // http://agricola.nal.usda.gov/help/z3950.html
  253. //
  254. // More in depth details: http://www.loc.gov/z3950/agency/bib1.html
  255. //
  256. // CCL Syntax: http://www.indexdata.com/yaz/doc/tools.html#CCL
  257. //
  258. $fields = array(
  259. "title" => "u=4",
  260. "author" => "u=1003",
  261. "abstract" => "u=62",
  262. "id" => "u=12",
  263. "year" => "u=30 r=o",
  264. );
  265. yaz_ccl_conf($yazc, $fields);
  266. if (!yaz_ccl_parse($yazc, $ccl, &$cclresult)) {
  267. drupal_set_message('Error parsing search string: ' . $cclresult["errorstring"], "error");
  268. watchdog('tripal_pub', 'Error: %errstr', array('%errstr' => $cclresult["errorstring"]), WATCHDOG_ERROR);
  269. return array();
  270. }
  271. $search_str = $cclresult["rpn"];
  272. $search_array['search_string'] = $search_str;
  273. // save the YAZ connection in the session for use by other functions
  274. $_SESSION['tripal_pub_AGL_query'][$search_str]['yaz_connection'] = $yazc;
  275. // we want to get the list of pubs using the search terms but using a Drupal style pager
  276. $pubs = tripal_pager_callback('tripal_pub_AGL_range', $num_to_retrieve, $pager_id,
  277. 'tripal_pub_AGL_count', $search_array);
  278. // close the connection
  279. unset($_SESSION['tripal_pub_AGL_query'][$search_str]['yaz_connection']);
  280. yaz_close($yazc);
  281. return $pubs;
  282. }
  283. /*
  284. * This function is used as the callback function when used with the
  285. * tripal_pager_callback function. This function returns a count of
  286. * the dataset to be paged.
  287. */
  288. function tripal_pub_AGL_count($search_array) {
  289. $search_str = $search_array['search_string'];
  290. $days = $search_array['days'];
  291. $limit = $search_array['limit'];
  292. $yazc = $_SESSION['tripal_pub_AGL_query'][$search_str]['yaz_connection'];
  293. //yaz_sort($yazc, "1=31 id"); // sort by publication date descending
  294. if (!yaz_search($yazc, "rpn", $search_str)){
  295. $error_no = yaz_errno($yazc);
  296. $error_msg = yaz_error($yazc);
  297. $additional = yaz_addinfo($yazc);
  298. if ($additional != $error_msg) {
  299. $error_msg .= " $additional";
  300. }
  301. drupal_set_message("ERROR preparing search at AGL: ($error_no) $error_msg", "error");
  302. return 0;
  303. }
  304. if (!yaz_wait()) {
  305. $error_no = yaz_errno($yazc);
  306. $error_msg = yaz_error($yazc);
  307. $additional = yaz_addinfo($yazc);
  308. if ($additional != $error_msg) {
  309. $error_msg .= " $additional";
  310. }
  311. drupal_set_message("ERROR waiting on search at AGL: ($error_no) $error_msg", "error");
  312. return 0;
  313. }
  314. // get the total number of results from the serach
  315. $count = yaz_hits($yazc);
  316. $_SESSION['tripal_pub_AGL_query'][$search_str]['Count'] = $count;
  317. return $count;
  318. }
  319. /*
  320. * This function is used as the callback function when used with the
  321. * tripal_pager_callback function. This function returns the results
  322. * within the specified range
  323. */
  324. function tripal_pub_AGL_range($search_array, $start = 0, $limit = 10) {
  325. $pubs = array();
  326. $search_str = $search_array['search_string'];
  327. $days = $search_array['days'];
  328. $limit = $search_array['limit'];
  329. $yazc = $_SESSION['tripal_pub_AGL_query'][$search_str]['yaz_connection'];
  330. $count = $_SESSION['tripal_pub_AGL_query'][$search_str]['Count'];
  331. yaz_range($yazc, 1, $num_pubs);
  332. if (!yaz_present($yazc)) {
  333. $error_no = yaz_errno($yazc);
  334. $error_msg = yaz_error($yazc);
  335. $additional = yaz_addinfo($yazc);
  336. if ($additional != $error_msg) {
  337. $error_msg .= " $additional";
  338. }
  339. drupal_set_message("ERROR waiting on search at AGL: ($error_no) $error_msg", "error");
  340. return $pubs;
  341. }
  342. if ($start + $limit > $count) {
  343. $limit = $count - $start;
  344. }
  345. for($i = $start; $i < $start + $limit; $i++) {
  346. $pub_xml = yaz_record($yazc, $i + 1, 'xml; charset=marc-8,utf-8');
  347. $pub = tripal_pub_AGL_parse_pubxml($pub_xml);
  348. $pubs[] = $pub;
  349. }
  350. return $pubs;
  351. }
  352. /*
  353. * Description of XML format:
  354. * http://www.loc.gov/marc/bibliographic/bdsummary.html
  355. *
  356. */
  357. function tripal_pub_AGL_parse_pubxml($pub_xml) {
  358. $pub = array();
  359. if (!$pub_xml) {
  360. return $pub;
  361. }
  362. // read the XML and iterate through it.
  363. $xml = new XMLReader();
  364. $xml->xml($pub_xml);
  365. while ($xml->read()) {
  366. $element = $xml->name;
  367. if ($xml->nodeType == XMLReader::ELEMENT and $element == 'controlfield') {
  368. $tag = $xml->getAttribute('tag');
  369. $xml->read();
  370. $value = $xml->value;
  371. switch ($tag) {
  372. case '001': // control number
  373. $pub['Publication Accession'] = $value;
  374. break;
  375. case '003': // control number identifier
  376. break;
  377. case '005': // datea nd time of latest transaction
  378. break;
  379. case '006': // fixed-length data elemetns
  380. break;
  381. case '007': // physical description fixed field
  382. break;
  383. case '008': // fixed length data elements
  384. $month = array(
  385. '01' => 'Jan', '02' => 'Feb', '03' => 'Mar',
  386. '04' => 'Apr', '05' => 'May', '06' => 'Jun',
  387. '07' => 'Jul', '08' => 'Aug', '09' => 'Sep',
  388. '10' => 'Oct', '11' => 'Nov', '12' => 'Dec'
  389. );
  390. $date0 = substr($value, 0, 6); // date entered on file
  391. $date1 = substr($value, 7, 4); // year of publication
  392. $date2 = substr($value, 11, 4); // month of publication
  393. $place = substr($value, 15, 3);
  394. $lang = substr($value, 35, 3);
  395. if (preg_match('/\d\d\d\d/', $date1)) {
  396. $pub['Year'] = $date1;
  397. $pub['Publication Date'] = $date1;
  398. }
  399. if (preg_match('/\d\d/', $date2)) {
  400. $pub['Publication Date'] = $date1 . " " . $month[substr($date2, 0, 2)] . " " . substr($date2, 3, 2);
  401. }
  402. if (!preg_match('/\s+/', $place)) {
  403. $pub['Published Location'] = $place;
  404. }
  405. if (!preg_match('/\s+/', $lang)) {
  406. $pub['Language Abbr'] = $lang;
  407. }
  408. break;
  409. default: // unhandled tag
  410. break;
  411. }
  412. }
  413. elseif ($xml->nodeType == XMLReader::ELEMENT and $element == 'datafield') {
  414. $tag = $xml->getAttribute('tag');
  415. $ind1 = $xml->getAttribute('ind1');
  416. $ind2 = $xml->getAttribute('ind2');
  417. switch ($tag) {
  418. case '16': // National Bibliographic Agency Control Number
  419. break;
  420. case '35': // System Control Number
  421. $author = array();
  422. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  423. foreach ($codes as $code => $value) {
  424. switch ($code) {
  425. case 'a': // System control number
  426. $pub['Publication Accession'] = $value;
  427. break;
  428. }
  429. }
  430. case '40': // Cataloging Source (NR)
  431. $author = array();
  432. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  433. foreach ($codes as $code => $value) {
  434. switch ($code) {
  435. case 'a': // original cataolging agency
  436. $pub['Publication Database'] = $value;
  437. break;
  438. }
  439. }
  440. break;
  441. case '72': // Subject Category Code
  442. break;
  443. case '100': // main entry-personal name
  444. $author = tripal_pub_remote_search_AGL_get_author($xml, $ind1);
  445. $pub['Author List'][] = $author;
  446. break;
  447. case '110': // main entry-corporate nmae
  448. $author = array();
  449. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  450. foreach ($codes as $code => $value) {
  451. switch ($code) {
  452. case 'a': // Corporate name or jurisdiction name as entry elemen
  453. $author['Collective'] = $value;
  454. break;
  455. case 'b': // Subordinate unit
  456. $author['Collective'] .= ' ' . $value;
  457. break;
  458. }
  459. }
  460. $pub['Author List'][] = $author;
  461. break;
  462. case '111': // main entry-meeting name
  463. break;
  464. case '130': // main entry-uniform title
  465. break;
  466. case '210': // abbreviated title
  467. break;
  468. case '222': // key title
  469. break;
  470. case '240': // uniform title
  471. break;
  472. case '242': // translation of title by cataloging agency
  473. break;
  474. case '243': // collective uniform title
  475. break;
  476. case '245': // title statement
  477. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  478. foreach ($codes as $code => $value) {
  479. switch ($code) {
  480. case 'a':
  481. $pub['Title'] = preg_replace('/\.$/', '', $value);
  482. break;
  483. case 'b':
  484. $pub['Title'] .= ' ' . $value;
  485. break;
  486. case 'h':
  487. $pub['Publication Model'] = $value;
  488. break;
  489. }
  490. }
  491. break;
  492. case '246': // varying form of title
  493. break;
  494. case '247': // former title
  495. break;
  496. case '250': // edition statement
  497. break;
  498. case '254': // musicla presentation statement
  499. break;
  500. case '255': // cartographic mathematical data
  501. break;
  502. case '256': // computer file characteristics
  503. break;
  504. case '257': // country of producing entity
  505. break;
  506. case '258': // philatelic issue data
  507. break;
  508. case '260': // publication, distribution ,etc (imprint)
  509. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  510. foreach ($codes as $code => $value) {
  511. switch ($code) {
  512. case 'a':
  513. $pub['Published Location'] = $value;
  514. break;
  515. case 'b':
  516. $pub['Publisher'] = $value;
  517. break;
  518. case 'c':
  519. $pub['Publication Date'] = $value;
  520. break;
  521. }
  522. }
  523. break;
  524. case '263': // projected publication date
  525. break;
  526. case '264': // production, publication, distribution, manufacture and copyright notice
  527. break;
  528. case '270': // Address
  529. break;
  530. case '300': // Address
  531. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  532. foreach ($codes as $code => $value) {
  533. switch ($code) {
  534. case 'a':
  535. $pages = $value;
  536. $pages = preg_replace('/^p\. /', '', $pages);
  537. $pages = preg_replace('/\.$/', '' , $pages);
  538. if(preg_match('/p$/', $pages)) {
  539. // skip this, it's the number of pages not the page numbers
  540. }
  541. else {
  542. $pub['Pages'] = $pages;
  543. }
  544. break;
  545. }
  546. }
  547. break;
  548. case '500': // series statements
  549. $pub['Notes'] = $value;
  550. break;
  551. case '504': // Bibliography, Etc. Note
  552. break;
  553. case '520': // Summary, etc
  554. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  555. foreach ($codes as $code => $value) {
  556. switch ($code) {
  557. case 'a':
  558. $pub['Abstract'] = $value;
  559. break;
  560. }
  561. }
  562. break;
  563. case '650': // Subject Added Entry-Topical Term
  564. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  565. foreach ($codes as $code => $value) {
  566. switch ($code) {
  567. case 'a':
  568. $pub['Keywords'][] = $value;
  569. break;
  570. }
  571. }
  572. break;
  573. case '653': // Index Term-Uncontrolled
  574. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  575. foreach ($codes as $code => $value) {
  576. switch ($code) {
  577. case 'a':
  578. $pub['Keywords'][] = $value;
  579. break;
  580. }
  581. }
  582. break;
  583. case '700': // Added Entry-Personal Name
  584. $author = tripal_pub_remote_search_AGL_get_author($xml, $ind1);
  585. $pub['Author List'][] = $author;
  586. break;
  587. case '710': // Added Entry-Corporate Name
  588. $author = array();
  589. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  590. foreach ($codes as $code => $value) {
  591. switch ($code) {
  592. case 'a': // Corporate name or jurisdiction name as entry elemen
  593. $author['Collective'] = $value;
  594. break;
  595. case 'b': // Subordinate unit
  596. $author['Collective'] .= ' ' . $value;
  597. break;
  598. }
  599. }
  600. $pub['Author List'][] = $author;
  601. break;
  602. case '773': // host item entry
  603. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  604. foreach ($codes as $code => $value) {
  605. switch ($code) {
  606. case 't':
  607. $pub['Journal Name'] = preg_replace('/\.$/', '', $value);
  608. break;
  609. case 'g':
  610. $matches = array();
  611. if (preg_match('/^(\d\d\d\d)/', $value, $matches)) {
  612. $pub['Year'] = $matches[1];
  613. $pub['Publication Date'] = $matches[1];
  614. }
  615. elseif (preg_match('/(.*?)(\.|\s+)\s*(\d+),\s(\d\d\d\d)/', $value, $matches)) {
  616. $year = $matches[4];
  617. $month = $matches[1];
  618. $day = $matches[3];
  619. $pub['Year'] = $year;
  620. $pub['Publication Date'] = "$year $month $day";
  621. }
  622. elseif (preg_match('/\((.*?)(\.|\s+)(\d\d\d\d)\)/', $value, $matches)) {
  623. $year = $matches[3];
  624. $month = $matches[1];
  625. $pub['Year'] = $year;
  626. $pub['Publication Date'] = "$year $month";
  627. }
  628. elseif (preg_match('/^(.*?) (\d\d\d\d)/', $value, $matches)) {
  629. $year = $matches[2];
  630. $month = $matches[1];
  631. $pub['Year'] = $year;
  632. $pub['Publication Date'] = "$year $month";
  633. }
  634. if (preg_match('/v\. (.*?)(,|\s+)/', $value, $matches)) {
  635. $pub['Volume'] = $matches[1];
  636. }
  637. if (preg_match('/v\. (.*?)(,|\s+)\((.*?)\)/', $value, $matches)) {
  638. $pub['Volume'] = $matches[1];
  639. $pub['Issue'] = $matches[3];
  640. }
  641. if (preg_match('/no\. (.*?)(\s|$)/', $value, $matches)) {
  642. $pub['Issue'] = $matches[1];
  643. }
  644. break;
  645. case 'p':
  646. $pub['Journal Abbreviation'] = $value;
  647. break;
  648. case 'z':
  649. $pub['ISBN'] = $value;
  650. break;
  651. }
  652. }
  653. break;
  654. case '852': // Location (Where is the publication held)
  655. break;
  656. case '856': // Electronic Location and Access
  657. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  658. foreach ($codes as $code => $value) {
  659. switch ($code) {
  660. case 'u':
  661. $pub['URL'] = $value;
  662. break;
  663. }
  664. }
  665. break;
  666. default:
  667. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  668. $unhandled[$tag][] = $codes;
  669. break;
  670. }
  671. }
  672. }
  673. //dpm($unhandled);
  674. // build the Dbxref
  675. if ($pub['Publication Database'] != 'AGL') {
  676. }
  677. if ($pub['Publication Accession'] and $pub['Publication Database']) {
  678. $pub['Publication Dbxref'] = $pub['Publication Database'] . ":" . $pub['Publication Accession'];
  679. unset($pub['Publication Accession']);
  680. unset($pub['Publication Database']);
  681. }
  682. // build the full authors list
  683. foreach ($pub['Author List'] as $author) {
  684. if ($author['valid'] == 'N') {
  685. // skip non-valid entries. A non-valid entry should have
  686. // a corresponding corrected entry so we can saftely skip it.
  687. continue;
  688. }
  689. if ($author['Collective']) {
  690. $authors .= $author['Collective'] . ', ';
  691. }
  692. else {
  693. $authors .= $author['Surname'] . ' ' . $author['First Initials'] . ', ';
  694. }
  695. }
  696. $authors = substr($authors, 0, -2);
  697. $pub['Authors'] = $authors;
  698. // build the citation
  699. $pub['Citation'] = tripal_pub_create_citation($pub);
  700. $pub['raw'] = $pub_xml;
  701. return $pub;
  702. }
  703. /*
  704. *
  705. *
  706. */
  707. function tripal_pub_remote_search_AGL_get_subfield($xml) {
  708. $codes = array();
  709. while ($xml->read()) {
  710. $sub_element = $xml->name;
  711. // when we've reached the end of the datafield element then break out of the while loop
  712. if ($xml->nodeType == XMLReader::END_ELEMENT and $sub_element == 'datafield') {
  713. return $codes;
  714. }
  715. // if inside the subfield element then get the code
  716. if ($xml->nodeType == XMLReader::ELEMENT and $sub_element == 'subfield') {
  717. $code = $xml->getAttribute('code');
  718. $xml->read();
  719. $value = $xml->value;
  720. $codes[$code] = $value;
  721. }
  722. }
  723. return $codes;
  724. }
  725. /*
  726. *
  727. *
  728. */
  729. function tripal_pub_remote_search_AGL_get_author($xml, $ind1) {
  730. $author = array();
  731. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  732. foreach ($codes as $code => $value) {
  733. switch ($code) {
  734. case 'a':
  735. // remove any trailing commas
  736. $value = preg_replace('/,$/', '', $value);
  737. if ($ind1 == 0) { // Given Name is first
  738. $author['Given Name'] = $names[0];
  739. }
  740. if ($ind1 == 1) { // Surname is first
  741. // split the parts of the name using a comma
  742. $names = explode(',', $value);
  743. $author['Surname'] = $names[0];
  744. $author['Given Name'] = '';
  745. unset($names[0]);
  746. foreach($names as $index => $name) {
  747. $author['Given Name'] .= $name . ' ';
  748. }
  749. $first_names = explode(' ', $author['Given Name']);
  750. $author['First Initials'] = '';
  751. foreach ($first_names as $index => $name) {
  752. $author['First Initials'] .= substr($name, 0, 1);
  753. }
  754. }
  755. if ($ind1 == 3) { // A family name
  756. }
  757. break;
  758. }
  759. }
  760. return $author;
  761. }