123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447 |
- <?php
- /**
- * Installation:
- * 1) Install the yaz libraries: sudo apt-get install yaz libyaz3 libyaz3-dev
- * 2) Install the PHP module: sudo pecl install yaz
- * 3) Add "extension=yaz.so" to php.ini
- * 4) Restart apache
- *
- * http://agricola.nal.usda.gov/help/z3950.html
- *
- */
- /**
- *
- */
- function tripal_pub_remote_search_AGRICOLA($search_array, $num_to_retrieve, $pager_id) {
- // get some values from the serach array
- $num_criteria = $search_array['num_criteria'];
- $days = $search_array['days'];
-
- // Build the query by iterating through the search array values
- $search_str = '';
- for ($i = 0; $i <= $num_criteria; $i++) {
- $search_terms = $search_array['criteria'][$i]['search_terms'];
- $scope = $search_array['criteria'][$i]['scope'];
- $op = $search_array['criteria'][$i]['operation'];
- // Quick Reference For Attribute Fields
- // (eg: "@attr 2=" refers to the Relation attribute)
- // 1 = Use Field
- // 2 = Relation
- // 3 = Position
- // 4 = Structure
- // 5 = Truncate
- // 6 = Completeness
- // The attribute set used by AGRICOLA can be found at the bottom of this page:
- // http://agricola.nal.usda.gov/help/z3950.html
- // 1003 == Author
- // 4 = Title
- if ($op) {
- $search_str .= "$op";
- }
- if($scope == 'title') {
- $search_str = "@attr 1=4 \"$search_terms\"";
- }
- elseif($scope == 'author') {
- }
- elseif($scope == 'abstract') {
- }
- elseif($scope == 'id') {
- }
- }
- dpm($search_str);
-
- $search_array['limit'] = $num_to_retrieve;
- $search_array['search_string'] = $search_str;
- // yaz_connect() prepares for a connection to a Z39.50 server. This function is non-blocking
- // and does not attempt to establish a connection - it merely prepares a connect to be
- // performed later when yaz_wait() is called.
- //$yazc = yaz_connect('agricola.nal.usda.gov:7090/voyager'); // NAL Catalog
- $yazc = yaz_connect('agricola.nal.usda.gov:7190/voyager'); // NAL Article Citation Database
- $_SESSION['tripal_pub_AGRICOLA_query'][$search_str]['yaz_connection'] = $yazc;
-
- // use the USMARC record type. But OPAC is also supported by Agricola
- yaz_syntax($yazc, "usmarc");
-
- // we want to get the list of pubs using the search terms but using a Drupal style pager
- $pubs = tripal_pager_callback('tripal_pub_AGRICOLA_range', $num_to_retrieve, $pager_id,
- 'tripal_pub_AGRICOLA_count', $search_array);
-
- // close the connection
- unset($_SESSION['tripal_pub_AGRICOLA_query'][$search_str]['yaz_connection']);
- yaz_close($yazc);
-
- return $pubs;
- }
- /*
- * This function is used as the callback function when used with the
- * tripal_pager_callback function. This function returns a count of
- * the dataset to be paged.
- */
- function tripal_pub_AGRICOLA_count($search_array) {
- $search_str = $search_array['search_string'];
- $days = $search_array['days'];
- $limit = $search_array['limit'];
-
- $yazc = $_SESSION['tripal_pub_AGRICOLA_query'][$search_str]['yaz_connection'];
- yaz_search($yazc, "rpn", $search_str);
- yaz_wait();
-
- // get the total number of results from the serach
- $count = yaz_hits($yazc);
- $_SESSION['tripal_pub_AGRICOLA_query'][$search_str]['Count'] = $count;
- return $count;
- }
- /*
- * This function is used as the callback function when used with the
- * tripal_pager_callback function. This function returns the results
- * within the specified range
- */
- function tripal_pub_AGRICOLA_range($search_array, $start = 0, $limit = 10) {
- $search_str = $search_array['search_string'];
- $days = $search_array['days'];
- $limit = $search_array['limit'];
-
- $yazc = $_SESSION['tripal_pub_AGRICOLA_query'][$search_str]['yaz_connection'];
- $count = $_SESSION['tripal_pub_AGRICOLA_query'][$search_str]['Count'];
- yaz_range($yazc, 1, $num_pubs);
- yaz_present($yazc);
-
- $pubs = array();
- if ($start + $limit > $count) {
- $limit = $count - $start;
- }
- for($i = $start; $i < $start + $limit; $i++) {
- $pub_xml = yaz_record($yazc, $i + 1, 'xml');
- $pub = tripal_pub_AGRICOLA_parse_pubxml($pub_xml);
- $pubs[] = $pub;
- }
- return $pubs;
- }
- /*
- * Description of XML format:
- * http://www.loc.gov/marc/bibliographic/bdsummary.html
- *
- */
- function tripal_pub_AGRICOLA_parse_pubxml($pub_xml) {
- $pub = array();
-
- if (!$pub_xml) {
- return $pub;
- }
-
- // read the XML and iterate through it.
- $xml = new XMLReader();
- $xml->xml($pub_xml);
- while ($xml->read()) {
- $element = $xml->name;
- if ($xml->nodeType == XMLReader::ELEMENT and $element == 'controlfield') {
- $tag = $xml->getAttribute('tag');
- $value = $xml->read();
- switch ($tag) {
- case '001': // control number
- break;
- case '003': // control number identifier
- break;
- case '005': // datea nd time of latest transaction
- break;
- case '006': // fixed-length data elemetns
- break;
- case '007': // physical description fixed field
- break;
- case '008': // fixed length data elements
- break;
- default: // unhandled tag
- break;
- }
- }
- elseif ($xml->nodeType == XMLReader::ELEMENT and $element == 'datafield') {
- $tag = $xml->getAttribute('tag');
- $ind1 = $xml->getAttribute('ind1');
- $ind2 = $xml->getAttribute('ind2');
- switch ($tag) {
- case '100': // main entry-personal name
- $author = tripal_pub_remote_search_AGRICOLA_get_author($xml, $ind1);
- $pub['Author List'][] = $author;
- break;
- case '110': // main entry-corporate nmae
- $author = array();
- $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
- foreach ($codes as $code => $value) {
- switch ($code) {
- case 'a': // Corporate name or jurisdiction name as entry elemen
- $author['Collective'] = $value;
- break;
- case 'b': // Subordinate unit
- $author['Collective'] .= ' ' . $value;
- break;
- }
- }
- $pub['Author List'][] = $author;
- break;
- case '111': // main entry-meeting name
- break;
- case '130': // main entry-uniform title
- break;
- case '210': // abbreviated title
- break;
- case '222': // key title
- break;
- case '240': // uniform title
- break;
- case '242': // translation of title by cataloging agency
- break;
- case '243': // collective uniform title
- break;
- case '245': // title statement
- $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
- foreach ($codes as $code => $value) {
- switch ($code) {
- case 'a':
- $pub['Title'] = preg_replace('/\.$/', '', $value);
- break;
- case 'b':
- $pub['Title'] .= ' ' . $value;
- break;
- case 'h':
- $pub['Publication Model'] = $value;
- break;
- }
- }
- break;
- case '246': // varying form of title
- break;
- case '247': // former title
- break;
- case '250': // edition statement
- break;
- case '254': // musicla presentation statement
- break;
- case '255': // cartographic mathematical data
- break;
- case '256': // computer file characteristics
- break;
- case '257': // country of producing entity
- break;
- case '258': // philatelic issue data
- break;
- case '260': // publication, distribution ,etc (imprint)
- $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
- foreach ($codes as $code => $value) {
- switch ($code) {
- case 'a':
- $pub['Published Location'] = $value;
- break;
- case 'b':
- $pub['Publisher'] = $value;
- break;
- case 'c':
- $pub['Publication Date'] = $value;
- break;
- }
- }
- break;
- case '263': // projected publication date
- break;
- case '264': // production, publication, distribution, manufacture and copyright notice
- break;
- case '270': // Address
- break;
-
- case '300': // Address
- $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
- foreach ($codes as $code => $value) {
- switch ($code) {
- case 'a':
- $pages = $value;
- $pages = preg_replace('/^p\. /', '', $pages);
- $pages = preg_replace('/\.$/', '' , $pages);
- if(preg_match('/p$/', $pages)) {
- // skip this, it's the number of pages not the page numbers
- }
- else {
- $pub['Pages'] = $pages;
- }
- break;
- }
- }
- break;
- case '490': // series statements
- break;
- case '520': // Summary, etc
- $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
- foreach ($codes as $code => $value) {
- switch ($code) {
- case 'a':
- $pub['Abstract'] = $value;
- break;
- }
- }
- break;
-
- case '700': // Added Entry-Personal Name
- $author = tripal_pub_remote_search_AGRICOLA_get_author($xml, $ind1);
- $pub['Author List'][] = $author;
- break;
- case '710': // Added Entry-Corporate Name
- $author = array();
- $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
- foreach ($codes as $code => $value) {
- switch ($code) {
- case 'a': // Corporate name or jurisdiction name as entry elemen
- $author['Collective'] = $value;
- break;
- case 'b': // Subordinate unit
- $author['Collective'] .= ' ' . $value;
- break;
- }
- }
- $pub['Author List'][] = $author;
- break;
- case '773': // host item entry
- $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
- foreach ($codes as $code => $value) {
- switch ($code) {
- case 't':
- $pub['Journal Name'] = preg_replace('/\.$/', '', $value);
- break;
- case 'g':
- $matches = array();
- if(preg_match('/^(\d\d\d\d)/', $value, $matches)) {
- $pub['Year'] = $matches[1];
- $pub['Publication Date'] = $matches[1];
- }
- elseif(preg_match('/(.*?)(\.|\s+)\s*(\d+),\s(\d\d\d\d)/', $value, $matches)) {
- $year = $matches[4];
- $month = $matches[1];
- $day = $matches[3];
- $pub['Year'] = $year;
- $pub['Publication Date'] = "$year $month $day";
- }
- elseif(preg_match('/\((.*?)(\.|\s+)(\d\d\d\d)\)/', $value, $matches)) {
- $year = $matches[3];
- $month = $matches[1];
- $pub['Year'] = $year;
- $pub['Publication Date'] = "$year $month";
- }
- elseif(preg_match('/^(.*?) (\d\d\d\d)/', $value, $matches)) {
- $year = $matches[2];
- $month = $matches[1];
- $pub['Year'] = $year;
- $pub['Publication Date'] = "$year $month";
- }
- if(preg_match('/v\. (.*?)(,|\s+)/', $value, $matches)) {
- $pub['Volume'] = $matches[1];
- }
- if(preg_match('/v\. (.*?)(,|\s+)\((.*?)\)/', $value, $matches)) {
- $pub['Volume'] = $matches[1];
- $pub['Issue'] = $matches[3];
- }
- if(preg_match('/no\. (.*?)(\s|$)/', $value, $matches)) {
- $pub['Issue'] = $matches[1];
- }
- break;
- case 'p':
- $pub['Journal Abbreviation'] = $value;
- break;
- }
- }
- break;
- }
- }
- }
- // build the full authors list
- foreach ($pub['Author List'] as $author) {
- if ($author['valid'] == 'N') {
- // skip non-valid entries. A non-valid entry should have
- // a corresponding corrected entry so we can saftely skip it.
- continue;
- }
- if ($author['Collective']) {
- $authors .= $author['Collective'] . ', ';
- }
- else {
- $authors .= $author['Surname'] . ' ' . $author['First Initials'] . ', ';
- }
- }
- $authors = substr($authors, 0, -2);
- $pub['Authors'] = $authors;
-
- // build the citation
- $pub['Citation'] = tripal_pub_create_citation($pub);
-
- $pub['raw'] = $pub_xml;
- return $pub;
- }
- /*
- *
- *
- */
- function tripal_pub_remote_search_AGRICOLA_get_subfield($xml) {
- $codes = array();
- while ($xml->read()) {
- $sub_element = $xml->name;
- // when we've reached the end of the datafield element then break out of the while loop
- if ($xml->nodeType == XMLReader::END_ELEMENT and $sub_element == 'datafield') {
- return $codes;
- }
- // if inside the subfield element then get the code
- if ($xml->nodeType == XMLReader::ELEMENT and $sub_element == 'subfield') {
- $code = $xml->getAttribute('code');
- $xml->read();
- $value = $xml->value;
- $codes[$code] = $value;
- }
- }
- return $codes;
- }
- /*
- *
- *
- */
- function tripal_pub_remote_search_AGRICOLA_get_author($xml, $ind1) {
- $author = array();
- $codes = tripal_pub_remote_search_AGRICOLA_get_subfield($xml);
- foreach ($codes as $code => $value) {
- switch ($code) {
- case 'a':
- // remove any trailing commas
- $value = preg_replace('/,$/', '', $value);
- if ($ind1 == 0) { // Given Name is first
- $author['Given Name'] = $names[0];
- }
- if ($ind1 == 1) { // Surname is first
- // split the parts of the name using a comma
- $names = explode(',', $value);
- $author['Surname'] = $names[0];
- $author['Given Name'] = '';
- unset($names[0]);
- foreach($names as $index => $name) {
- $author['Given Name'] .= $name . ' ';
- }
- $first_names = explode(' ', $author['Given Name']);
- $author['First Initials'] = '';
- foreach ($first_names as $index => $name) {
- $author['First Initials'] .= substr($name, 0, 1);
- }
- }
- if ($ind1 == 3) { // A family name
-
- }
- break;
- }
- }
- return $author;
- }
|