agricola.inc 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. <?php
  2. /**
  3. * Installation:
  4. * 1) Install the yaz libraries: sudo apt-get install yaz libyaz3 libyaz3-dev
  5. * 2) Install the PHP module: sudo pecl install yaz
  6. * 3) Add "extension=yaz.so" to php.ini
  7. * 4) Restart apache
  8. *
  9. * http://agricola.nal.usda.gov/help/z3950.html
  10. *
  11. */
  12. /**
  13. *
  14. */
  15. function tripal_pub_remote_search_AGRICOLA($search_array, $num_to_retrieve, $pager_id) {
  16. // get some values from the serach array
  17. $num_criteria = $search_array['num_criteria'];
  18. $days = $search_array['days'];
  19. // Build the query by iterating through the search array values
  20. $search_str = '';
  21. for ($i = 0; $i <= $num_criteria; $i++) {
  22. $search_terms = $search_array['criteria'][$i]['search_terms'];
  23. $scope = $search_array['criteria'][$i]['scope'];
  24. $op = $search_array['criteria'][$i]['operation'];
  25. // Quick Reference For Attribute Fields
  26. // (eg: "@attr 2=" refers to the Relation attribute)
  27. // 1 = Use Field
  28. // 2 = Relation
  29. // 3 = Position
  30. // 4 = Structure
  31. // 5 = Truncate
  32. // 6 = Completeness
  33. // The attribute set used by AGRICOLA can be found at the bottom of this page:
  34. // http://agricola.nal.usda.gov/help/z3950.html
  35. // 1003 == Author
  36. // 4 = Title
  37. if ($op) {
  38. $search_str .= "$op";
  39. }
  40. if($scope == 'title') {
  41. $search_str = "@attr 1=4 \"$search_terms\"";
  42. }
  43. elseif($scope == 'author') {
  44. }
  45. elseif($scope == 'abstract') {
  46. }
  47. elseif($scope == 'id') {
  48. }
  49. }
  50. dpm($search_str);
  51. $search_array['limit'] = $num_to_retrieve;
  52. $search_array['search_string'] = $search_str;
  53. // yaz_connect() prepares for a connection to a Z39.50 server. This function is non-blocking
  54. // and does not attempt to establish a connection - it merely prepares a connect to be
  55. // performed later when yaz_wait() is called.
  56. //$yazc = yaz_connect('agricola.nal.usda.gov:7090/voyager'); // NAL Catalog
  57. $yazc = yaz_connect('agricola.nal.usda.gov:7190/voyager'); // NAL Article Citation Database
  58. $_SESSION['tripal_pub_AGRICOLA_query'][$search_str]['yaz_connection'] = $yazc;
  59. // use the USMARC record type. But OPAC is also supported by Agricola
  60. yaz_syntax($yazc, "usmarc");
  61. // we want to get the list of pubs using the search terms but using a Drupal style pager
  62. $pubs = tripal_pager_callback('tripal_pub_AGRICOLA_range', $num_to_retrieve, $pager_id,
  63. 'tripal_pub_AGRICOLA_count', $search_array);
  64. // close the connection
  65. unset($_SESSION['tripal_pub_AGRICOLA_query'][$search_str]['yaz_connection']);
  66. yaz_close($yazc);
  67. return $pubs;
  68. }
  69. /*
  70. * This function is used as the callback function when used with the
  71. * tripal_pager_callback function. This function returns a count of
  72. * the dataset to be paged.
  73. */
  74. function tripal_pub_AGRICOLA_count($search_array) {
  75. $search_str = $search_array['search_string'];
  76. $days = $search_array['days'];
  77. $limit = $search_array['limit'];
  78. $yazc = $_SESSION['tripal_pub_AGRICOLA_query'][$search_str]['yaz_connection'];
  79. yaz_search($yazc, "rpn", $search_str);
  80. yaz_wait();
  81. // get the total number of results from the serach
  82. $count = yaz_hits($yazc);
  83. $_SESSION['tripal_pub_AGRICOLA_query'][$search_str]['Count'] = $count;
  84. return $count;
  85. }
  86. /*
  87. * This function is used as the callback function when used with the
  88. * tripal_pager_callback function. This function returns the results
  89. * within the specified range
  90. */
  91. function tripal_pub_AGRICOLA_range($search_array, $start = 0, $limit = 10) {
  92. $search_str = $search_array['search_string'];
  93. $days = $search_array['days'];
  94. $limit = $search_array['limit'];
  95. $yazc = $_SESSION['tripal_pub_AGRICOLA_query'][$search_str]['yaz_connection'];
  96. $count = $_SESSION['tripal_pub_AGRICOLA_query'][$search_str]['Count'];
  97. yaz_range($yazc, 1, $num_pubs);
  98. yaz_present($yazc);
  99. $pubs = array();
  100. if ($start + $limit > $count) {
  101. $limit = $count - $start;
  102. }
  103. for($i = $start; $i < $start + $limit; $i++) {
  104. $pub_xml = yaz_record($yazc, $i + 1, 'xml');
  105. $pub = tripal_pub_AGRICOLA_parse_pubxml($pub_xml);
  106. $pubs[] = $pub;
  107. }
  108. return $pubs;
  109. }
  110. /*
  111. * Description of XML format:
  112. * http://www.loc.gov/marc/bibliographic/bdsummary.html
  113. *
  114. */
  115. function tripal_pub_AGRICOLA_parse_pubxml($pub_xml) {
  116. $pub = array();
  117. if (!$pub_xml) {
  118. return $pub;
  119. }
  120. // read the XML and iterate through it.
  121. $xml = new XMLReader();
  122. $xml->xml($pub_xml);
  123. while ($xml->read()) {
  124. $element = $xml->name;
  125. if ($xml->nodeType == XMLReader::ELEMENT and $element == 'controlfield') {
  126. $tag = $xml->getAttribute('tag');
  127. $value = $xml->read();
  128. switch ($tag) {
  129. case '001': // control number
  130. break;
  131. case '003': // control number identifier
  132. break;
  133. case '005': // datea nd time of latest transaction
  134. break;
  135. case '006': // fixed-length data elemetns
  136. break;
  137. case '007': // physical description fixed field
  138. break;
  139. case '008': // fixed length data elements
  140. break;
  141. default: // unhandled tag
  142. break;
  143. }
  144. }
  145. elseif ($xml->nodeType == XMLReader::ELEMENT and $element == 'datafield') {
  146. $tag = $xml->getAttribute('tag');
  147. $ind1 = $xml->getAttribute('ind1');
  148. $ind2 = $xml->getAttribute('ind2');
  149. switch ($tag) {
  150. case '100': // main entry-personal name
  151. $author = tripal_pub_remote_search_AGRICOLA_get_author($xml, $ind1);
  152. $pub['Author List'][] = $author;
  153. break;
  154. case '110': // main entry-corporate nmae
  155. $author = array();
  156. $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
  157. foreach ($codes as $code => $value) {
  158. switch ($code) {
  159. case 'a': // Corporate name or jurisdiction name as entry elemen
  160. $author['Collective'] = $value;
  161. break;
  162. case 'b': // Subordinate unit
  163. $author['Collective'] .= ' ' . $value;
  164. break;
  165. }
  166. }
  167. $pub['Author List'][] = $author;
  168. break;
  169. case '111': // main entry-meeting name
  170. break;
  171. case '130': // main entry-uniform title
  172. break;
  173. case '210': // abbreviated title
  174. break;
  175. case '222': // key title
  176. break;
  177. case '240': // uniform title
  178. break;
  179. case '242': // translation of title by cataloging agency
  180. break;
  181. case '243': // collective uniform title
  182. break;
  183. case '245': // title statement
  184. $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
  185. foreach ($codes as $code => $value) {
  186. switch ($code) {
  187. case 'a':
  188. $pub['Title'] = preg_replace('/\.$/', '', $value);
  189. break;
  190. case 'b':
  191. $pub['Title'] .= ' ' . $value;
  192. break;
  193. case 'h':
  194. $pub['Publication Model'] = $value;
  195. break;
  196. }
  197. }
  198. break;
  199. case '246': // varying form of title
  200. break;
  201. case '247': // former title
  202. break;
  203. case '250': // edition statement
  204. break;
  205. case '254': // musicla presentation statement
  206. break;
  207. case '255': // cartographic mathematical data
  208. break;
  209. case '256': // computer file characteristics
  210. break;
  211. case '257': // country of producing entity
  212. break;
  213. case '258': // philatelic issue data
  214. break;
  215. case '260': // publication, distribution ,etc (imprint)
  216. $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
  217. foreach ($codes as $code => $value) {
  218. switch ($code) {
  219. case 'a':
  220. $pub['Published Location'] = $value;
  221. break;
  222. case 'b':
  223. $pub['Publisher'] = $value;
  224. break;
  225. case 'c':
  226. $pub['Publication Date'] = $value;
  227. break;
  228. }
  229. }
  230. break;
  231. case '263': // projected publication date
  232. break;
  233. case '264': // production, publication, distribution, manufacture and copyright notice
  234. break;
  235. case '270': // Address
  236. break;
  237. case '300': // Address
  238. $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
  239. foreach ($codes as $code => $value) {
  240. switch ($code) {
  241. case 'a':
  242. $pages = $value;
  243. $pages = preg_replace('/^p\. /', '', $pages);
  244. $pages = preg_replace('/\.$/', '' , $pages);
  245. if(preg_match('/p$/', $pages)) {
  246. // skip this, it's the number of pages not the page numbers
  247. }
  248. else {
  249. $pub['Pages'] = $pages;
  250. }
  251. break;
  252. }
  253. }
  254. break;
  255. case '490': // series statements
  256. break;
  257. case '520': // Summary, etc
  258. $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
  259. foreach ($codes as $code => $value) {
  260. switch ($code) {
  261. case 'a':
  262. $pub['Abstract'] = $value;
  263. break;
  264. }
  265. }
  266. break;
  267. case '700': // Added Entry-Personal Name
  268. $author = tripal_pub_remote_search_AGRICOLA_get_author($xml, $ind1);
  269. $pub['Author List'][] = $author;
  270. break;
  271. case '710': // Added Entry-Corporate Name
  272. $author = array();
  273. $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
  274. foreach ($codes as $code => $value) {
  275. switch ($code) {
  276. case 'a': // Corporate name or jurisdiction name as entry elemen
  277. $author['Collective'] = $value;
  278. break;
  279. case 'b': // Subordinate unit
  280. $author['Collective'] .= ' ' . $value;
  281. break;
  282. }
  283. }
  284. $pub['Author List'][] = $author;
  285. break;
  286. case '773': // host item entry
  287. $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
  288. foreach ($codes as $code => $value) {
  289. switch ($code) {
  290. case 't':
  291. $pub['Journal Name'] = preg_replace('/\.$/', '', $value);
  292. break;
  293. case 'g':
  294. $matches = array();
  295. if(preg_match('/^(\d\d\d\d)/', $value, $matches)) {
  296. $pub['Year'] = $matches[1];
  297. $pub['Publication Date'] = $matches[1];
  298. }
  299. elseif(preg_match('/(.*?)(\.|\s+)\s*(\d+),\s(\d\d\d\d)/', $value, $matches)) {
  300. $year = $matches[4];
  301. $month = $matches[1];
  302. $day = $matches[3];
  303. $pub['Year'] = $year;
  304. $pub['Publication Date'] = "$year $month $day";
  305. }
  306. elseif(preg_match('/\((.*?)(\.|\s+)(\d\d\d\d)\)/', $value, $matches)) {
  307. $year = $matches[3];
  308. $month = $matches[1];
  309. $pub['Year'] = $year;
  310. $pub['Publication Date'] = "$year $month";
  311. }
  312. elseif(preg_match('/^(.*?) (\d\d\d\d)/', $value, $matches)) {
  313. $year = $matches[2];
  314. $month = $matches[1];
  315. $pub['Year'] = $year;
  316. $pub['Publication Date'] = "$year $month";
  317. }
  318. if(preg_match('/v\. (.*?)(,|\s+)/', $value, $matches)) {
  319. $pub['Volume'] = $matches[1];
  320. }
  321. if(preg_match('/v\. (.*?)(,|\s+)\((.*?)\)/', $value, $matches)) {
  322. $pub['Volume'] = $matches[1];
  323. $pub['Issue'] = $matches[3];
  324. }
  325. if(preg_match('/no\. (.*?)(\s|$)/', $value, $matches)) {
  326. $pub['Issue'] = $matches[1];
  327. }
  328. break;
  329. case 'p':
  330. $pub['Journal Abbreviation'] = $value;
  331. break;
  332. }
  333. }
  334. break;
  335. }
  336. }
  337. }
  338. // build the full authors list
  339. foreach ($pub['Author List'] as $author) {
  340. if ($author['valid'] == 'N') {
  341. // skip non-valid entries. A non-valid entry should have
  342. // a corresponding corrected entry so we can saftely skip it.
  343. continue;
  344. }
  345. if ($author['Collective']) {
  346. $authors .= $author['Collective'] . ', ';
  347. }
  348. else {
  349. $authors .= $author['Surname'] . ' ' . $author['First Initials'] . ', ';
  350. }
  351. }
  352. $authors = substr($authors, 0, -2);
  353. $pub['Authors'] = $authors;
  354. // build the citation
  355. $pub['Citation'] = tripal_pub_create_citation($pub);
  356. $pub['raw'] = $pub_xml;
  357. return $pub;
  358. }
  359. /*
  360. *
  361. *
  362. */
  363. function tripal_pub_remote_search_AGRICOLA_get_subfield($xml) {
  364. $codes = array();
  365. while ($xml->read()) {
  366. $sub_element = $xml->name;
  367. // when we've reached the end of the datafield element then break out of the while loop
  368. if ($xml->nodeType == XMLReader::END_ELEMENT and $sub_element == 'datafield') {
  369. return $codes;
  370. }
  371. // if inside the subfield element then get the code
  372. if ($xml->nodeType == XMLReader::ELEMENT and $sub_element == 'subfield') {
  373. $code = $xml->getAttribute('code');
  374. $xml->read();
  375. $value = $xml->value;
  376. $codes[$code] = $value;
  377. }
  378. }
  379. return $codes;
  380. }
  381. /*
  382. *
  383. *
  384. */
  385. function tripal_pub_remote_search_AGRICOLA_get_author($xml, $ind1) {
  386. $author = array();
  387. $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
  388. foreach ($codes as $code => $value) {
  389. switch ($code) {
  390. case 'a':
  391. // remove any trailing commas
  392. $value = preg_replace('/,$/', '', $value);
  393. if ($ind1 == 0) { // Given Name is first
  394. $author['Given Name'] = $names[0];
  395. }
  396. if ($ind1 == 1) { // Surname is first
  397. // split the parts of the name using a comma
  398. $names = explode(',', $value);
  399. $author['Surname'] = $names[0];
  400. $author['Given Name'] = '';
  401. unset($names[0]);
  402. foreach($names as $index => $name) {
  403. $author['Given Name'] .= $name . ' ';
  404. }
  405. $first_names = explode(' ', $author['Given Name']);
  406. $author['First Initials'] = '';
  407. foreach ($first_names as $index => $name) {
  408. $author['First Initials'] .= substr($name, 0, 1);
  409. }
  410. }
  411. if ($ind1 == 3) { // A family name
  412. }
  413. break;
  414. }
  415. }
  416. return $author;
  417. }