| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026 | <?php/** * @file * * Importer for the USDA Agricultural Library (Agricola). * * This file provides support for importing and parsing of results from the * USDA National Agricultural Library (AGL) database.  The functions here are * used by both the publication importer setup form and the publication * importer. The USDA AGL database uses a YAZ protocol for querying and * retrieving records. * *//** * A hook for altering the publication importer form. * * It Changes the 'Days' element to 'Year' and removes the 'Journal Name' * filter. * * @param $form *   The Drupal form array * @param $form_state *   The form state array * @param $num_criteria *   The number of criteria the user currently has added to the form * * @return *   The form (drupal form api) * * @ingroup tripal_pub */function tripal_pub_remote_alter_form_AGL($form, $form_state, $num_criteria = 1) {  // So far we haven't been able to get AGL to filter results to only  // include pubs by the XX number days in the past.  So, we will  // change the 'days' element to be the year to query  $form['themed_element']['days']['#title'] = t('Year');  $form['themed_element']['days']['#description']  = t('Please enter a year to limit records by the year they were published, created or modified in the database.');  // The Journal Name filter doesn't seem to work, so remove it  for($i = 1; $i <= $num_criteria; $i++) {    unset($form['themed_element']['criteria'][$i]["scope-$i"]['#options']['journal']);  }  return $form;}/** * A hook for providing additional validation of importer setup form. * * @param $form *   The Drupal form array * @param $form_state *   The form state array * * @return *  The form (drupal form api) * * @ingroup tripal_pub */function tripal_pub_remote_validate_form_AGL($form, $form_state) {  $days =  trim($form_state['values']["days"]);  $num_criteria = $form_state['values']['num_criteria'];  if ($days and !preg_match('/^\d\d\d\d$/', $days)) {    form_set_error("days", "Please enter a four digit year.");  }  $num_ids = 0;  for ($i = 1; $i <= $num_criteria; $i++) {    $search_terms =  trim($form_state['values']["search_terms-$i"]);    $scope =  $form_state['values']["scope-$i"];    if ($scope == 'id' and !preg_match('/^AGL:\d+$/', $search_terms)) {      form_set_error("search_terms-$i", "The AGL accession be a numeric value, prefixed with 'AGL:' (e.g. AGL:3890740).");    }    if ($scope == 'id') {      $num_ids++;    }    if($num_ids > 1) {      form_set_error("search_terms-$i", "Unfortuantely, the AGL importer can only support a single accession at a time. Please remove the others.");    }  }  return $form;}/** * A hook for performing the search on the AGL database. * * @param $search_array *   An array containing the serach criteria for the serach * @param $num_to_retrieve *   Indicates the maximum number of publications to retrieve from the remote *   database * @param $page *   Indicates the page to retrieve.  This corresponds to a paged table, where *   each page has $num_to_retrieve publications. * * @return *  An array of publications. * * @ingroup tripal_pub */function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $page) {  // get some values from the serach array  $num_criteria = $search_array['num_criteria'];  $days         = array_key_exists('days', $search_array) ? $search_array['days'] : '';  // set some defaults  $search_array['limit'] = $num_to_retrieve;  // To build the CCL search string we want to have a single entry for  // 'author', 'title', 'abstract' or 'id', and also the corresponding 'not  // for each of those. But the search form allows the user to have multiple  // rows of the same type. So, we will build the search string separately for  // each category and it's negative category (if NOT is selected as the op)  // and at the end we will put them together into a single search string.  We  // need to keep track of the first entry of any category because it will not  // have an op (e.g. 'or' or 'and') but the operation will be pushed out to  // separate the categories.  The op for any second or third instance of  // the same category will be included within the search string for the  // category.  $ccl = '';  $title = '';  $author = '';  $abstract = '';  $id = '';  $any = '';  $negate_title = '';  $negate_author = '';  $negate_abstract = '';  $negate_id = '';  $negate_any = '';  $order = array();  $first_abstract = 1;  $first_author = 1;  $first_title = 1;  $first_id = 1;  $first_any = 1;  $first_negate_abstract = 1;  $first_negate_author = 1;  $first_negate_title = 1;  $first_negate_id = 1;  $first_negate_any = 1;  for ($i = 1; $i <= $num_criteria; $i++) {    $search_terms = trim($search_array['criteria'][$i]['search_terms']);    $scope = $search_array['criteria'][$i]['scope'];    $is_phrase = $search_array['criteria'][$i]['is_phrase'];    $op = $search_array['criteria'][$i]['operation'];    if ($op) {      $op = strtolower($op);    }    $search_terms = trim($search_terms);    // If this is not a phrase then make sure the AND and OR are lower-case.    if (!$is_phrase) {      $search_terms = preg_replace('/ OR /', ' or ', $search_terms);      $search_terms = preg_replace('/ AND /', ' and ', $search_terms);    }    // Else make sure the search terms are surrounded by quotes.    else {      $search_terms = "\"$search_terms\"";    }    // If this is a 'not' operation then we want to change it to an "and".    $negate = '';    if ($op == 'not') {      $scope = "negate_$scope";      $op = 'or';    }    $order[] = array('scope' => $scope, 'op' => $op);    // Build each category.    if ($scope == 'title') {      if ($first_title) {        $title .= "($search_terms) ";        $first_title = 0;      }      else {        $title .= "$op ($search_terms) ";      }    }    if ($scope == 'negate_title') {      if ($first_negate_title) {        $negate_title .= "($search_terms) ";        $first_negate_title = 0;      }      else {        $negate_title .= "$op ($search_terms) ";      }    }    elseif ($scope == 'author') {      if ($first_author) {        $author .= "($search_terms) ";        $first_author = 0;      }      else {        $author .= "$op ($search_terms) ";      }    }    elseif ($scope == 'negate_author') {      if ($first_negate_author) {        $negate_author .= "($search_terms) ";        $first_negate_author = 0;      }      else {        $negate_author .= "$op ($search_terms) ";      }    }    elseif ($scope == 'abstract') {      if ($first_abstract) {        $abstract .= "($search_terms) ";        $first_abstract = 0;      }      else {        $abstract .= "$op ($search_terms) ";      }    }    elseif ($scope == 'negate_abstract') {      if ($first_negate_abstract) {        $negate_abstract .= "($search_terms) ";        $first_negate_abstract = 0;      }      else {        $negate_abstract .= "$op ($search_terms) ";      }    }    elseif ($scope == 'journal') {      if ($first_journal) {        $journal .= "($search_terms) ";        $first_jounral = 0;      }      else {        $journal .= "$op ($search_terms) ";      }    }    elseif ($scope == 'negate_journal') {      if ($first_negate_journal) {        $negate_journal .= "($search_terms) ";        $first_negate_journal = 0;      }      else {        $negate_journal .= "$op ($search_terms) ";      }    }    elseif ($scope == 'id') {      if ($first_id) {        $id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";        $first_id = 0;      }      else {        $id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";      }    }    elseif ($scope == 'negate_id') {      if ($first_negate_id) {        $negate_id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";        $first_negate_id = 0;      }      else {        $negate_id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";      }    }    elseif ($scope == 'any'){      if ($first_any) {        $any .= "($search_terms) ";        $first_any = 0;      }      else {        $any .= "$op ($search_terms) ";      }    }    elseif ($scope == 'negate_any'){      if ($first_negate_any) {        $negate_any .= "($search_terms) ";        $first_any = 0;      }      else {        $negate_any .= "$op ($search_terms) ";      }    }  }  // Now build the CCL string in order.  $abstract_done = 0;  $author_done = 0;  $journal_done = 0;  $title_done = 0;  $id_done = 0;  $any_done = 0;  $negate_abstract_done = 0;  $negate_journal_done = 0;  $negate_author_done = 0;  $negate_title_done = 0;  $negate_id_done = 0;  $negate_any_done = 0;  for ($i = 0; $i < count($order) ; $i++) {    if ($order[$i]['scope'] == 'abstract' and !$abstract_done) {      $op = $order[$i]['op'];      $ccl .= "$op abstract=($abstract) ";      $abstract_done = 1;    }    if ($order[$i]['scope'] == 'negate_abstract' and !$negate_abstract_done) {      $ccl .= "not abstract=($negate_abstract) ";      $negate_abstract_done = 1;    }    if ($order[$i]['scope'] == 'author' and !$author_done) {      $op = $order[$i]['op'];      $ccl .= "$op author=($author) ";      $author_done = 1;    }    if ($order[$i]['scope'] == 'negate_author' and !$negate_author_done) {      $ccl .= "not author=($negate_author) ";      $negate_author_done = 1;    }    if ($order[$i]['scope'] == 'journal' and !$journal_done) {      $op = $order[$i]['op'];      $ccl .= "$op journal=($journal) ";      $journal_done = 1;    }    if ($order[$i]['scope'] == 'negate_journal' and !$negate_journal_done) {      $ccl .= "not author=($negate_journal) ";      $negate_journal_done = 1;    }    if ($order[$i]['scope'] == 'id' and !$id_done) {      $op = $order[$i]['op'];      $ccl .= "$op id=($id) ";      $id_done = 1;    }    if ($order[$i]['scope'] == 'negate_id' and !$negate_id_done) {      $ccl .= "not id=($negate_id) ";      $negate_id_done = 1;    }    if ($order[$i]['scope'] == 'title' and !$title_done) {      $op = $order[$i]['op'];      $ccl .= "$op title=($title) ";      $title_done = 1;    }    if ($order[$i]['scope'] == 'negate_title' and !$negate_title_done) {      $ccl .= "not title=($negate_title) ";      $negate_title_done = 1;    }    if ($order[$i]['scope'] == 'any' and !$any_done) {      $op = $order[$i]['op'];      $ccl .= "$op ($any) ";      $any_done = 1;    }    if ($order[$i]['scope'] == 'negate_any' and !$negate_any_done) {      $ccl .= "not ($negate_any) ";      $negate_any_done = 1;    }  }  // For AGL the 'days' form element was converted to represent the year.  if ($days) {    $ccl .= "and year=($days)";  }  // Remove any preceeding 'and' or 'or'.  $ccl = preg_replace('/^\s*(and|or)/', '', $ccl);  // yaz_connect() prepares for a connection to a Z39.50 server. This function  // is non-blocking and does not attempt to establish a connection - it merely  // prepares a connect to be performed later when yaz_wait() is called.  // NAL Catalog  // $yazc = yaz_connect('agricola.nal.usda.gov:7090/voyager');  // NAL Article Citation Database  $yazc = yaz_connect('agricola.nal.usda.gov:7190/voyager');  // Use the USMARC record type.  But OPAC is also supported by Agricola.  yaz_syntax($yazc, "usmarc");  // The search query is built using CCL, we need to first  // configure it so it can map the attributes to defined identifiers  // The attribute set used by AGL can be found at the bottom of this page:  // http://agricola.nal.usda.gov/help/z3950.html  //  // More in depth details:  http://www.loc.gov/z3950/agency/bib1.html  //  // CCL Syntax: http://www.indexdata.com/yaz/doc/tools.html#CCL  //  $fields = array(    "title"    => "u=4",    "author"   => "u=1003",    "abstract" => "u=62",    "id"       => "u=12",    "year"     => "u=30 r=o",    "journal"  => "u=1033"  );  yaz_ccl_conf($yazc, $fields);  if (!yaz_ccl_parse($yazc, $ccl, $cclresult)) {    drupal_set_message('Error parsing search string: ' . $cclresult["errorstring"], "error");    watchdog('tpub_import', 'Error: %errstr', array('%errstr' => $cclresult["errorstring"]), WATCHDOG_ERROR);    return array(      'total_records' => 0,      'search_str'    => '',      'pubs'          => array(),    );  }  $search_str = $cclresult["rpn"];  // get the total number of records  $total_records = tripal_pub_AGL_count($yazc, $search_str);  // get the pubs in the specified rang  $start = $page * $num_to_retrieve;  $results = tripal_pub_AGL_range($yazc, $search_str, $start, $num_to_retrieve, $total_records);  // close the connection  yaz_close($yazc);  return $results;}/** * Retreives a range of publications from AGL. * * @param $yazc *   The YAZC connection object. * @param $search_str *   The search string to use for searching. * @param $start *   The start of the range * @param $num_to_retrieve *   The number of publications to retrieve * @param $total_records *   The total number of records in the dataset.  This value should have *   been retrieved by tripal_pub_AGL_count() function. * * @return *  An array containing the total_records in the dataaset, the search string *  and an array of the publications that were retreived. * * @ingroup tripal_pub */function tripal_pub_AGL_range($yazc, $search_str, $start, $num_to_retrieve, $total_records) {  yaz_range($yazc, 1, $total_records);  if (!yaz_present($yazc)) {    $error_no = yaz_errno($yazc);    $error_msg = yaz_error($yazc);    $additional = yaz_addinfo($yazc);    if ($additional != $error_msg) {      $error_msg .= " $additional";    }    drupal_set_message("ERROR waiting on search at AGL: ($error_no) $error_msg", "error");    watchdog('tpub_import', "ERROR waiting on search at AGL: (%error_no) %error_msg",      array('%error_no' => $error_no, '%error_msg' => $error_msg), WATCHDOG_ERROR);    return array(      'total_records' => 0,      'search_str'    => $search_str,      'pubs'          => array(),    );  }  if ($start + $num_to_retrieve > $total_records) {    $num_to_retrieve = $total_records - $start;  }  $pubs = array();  for($i = $start; $i < $start + $num_to_retrieve; $i++) {    // retrieve the XML results    $pub_xml = yaz_record($yazc, $i + 1, 'xml; charset=marc-8,utf-8');    if (!$pub_xml) {      $error_no = yaz_errno($yazc);      $error_msg = yaz_error($yazc);      drupal_set_message("ERROR retrieving records from AGL: ($error_no) $error_msg", "error");      watchdog('tpub_import', "ERROR retrieving records from AGL: (%error_no) %error_msg",          array('%error_no' => $error_no, '%error_msg' => $error_msg), WATCHDOG_ERROR);      return array(        'total_records' => 0,        'search_str'    => $search_str,        'pubs'          => array(),      );    }    // parse the pub XML    $pub     = tripal_pub_AGL_parse_pubxml($pub_xml);    $pubs[]  = $pub;  }  return array(    'total_records' => $total_records,    'search_str'    => $search_str,    'pubs'          => $pubs,  );}/** * Retreives the total number of publications that match the search string. * * @param $yazc *   The YAZC connection object. * @param $search_str *   The search string to use for searching. * * @return *   a count of the total number of publications that match the search string * * @ingroup tripal_pub */function tripal_pub_AGL_count($yazc, $search_str) {  // Sort by publication date descending.  // yaz_sort($yazc, "1=31 id");  if (!yaz_search($yazc, "rpn", $search_str)){    $error_no = yaz_errno($yazc);    $error_msg = yaz_error($yazc);    $additional = yaz_addinfo($yazc);    if ($additional != $error_msg) {      $error_msg .= " $additional";    }    drupal_set_message("ERROR preparing search at AGL: ($error_no) $error_msg", "error");    watchdog('tpub_import', "ERROR preparing search at AGL: (%error_no) %error_msg",              array('%error_no' => $error_no, '%error_msg' => $error_msg), WATCHDOG_ERROR);    return 0;  }  if (!yaz_wait()) {    $error_no = yaz_errno($yazc);    $error_msg = yaz_error($yazc);    $additional = yaz_addinfo($yazc);    if ($additional != $error_msg) {      $error_msg .= " $additional";    }    drupal_set_message("ERROR waiting on search at AGL: ($error_no) $error_msg", "error");    watchdog('tpub_import', "ERROR waiting on search at AGL: (%error_no) %error_msg",              array('%error_no' => $error_no, '%error_msg' => $error_msg), WATCHDOG_ERROR);    return 0;  }  // get the total number of results from the serach  $count = yaz_hits($yazc);  return $count;}/** * Parse publication XML for a single publication * * Description of XML format: * http://www.loc.gov/marc/bibliographic/bdsummary.html * * @param $pub_xml *  A string containing the XML for a single publications * * @return *  An array containing the details of the publication * * @ingroup tripal_pub */function tripal_pub_AGL_parse_pubxml($pub_xml) {  $pub = array();  // we will set the default publication type as a journal article. The NAL  // dataset doesn't specify an article type so we'll have to glean the type  // from other information (e.g. series name has 'Proceedings' in it)  $pub['Publication Type'][0] = 'Journal Article';  if (!$pub_xml) {    return $pub;  }  // read the XML and iterate through it.  $xml = new XMLReader();  $xml->xml(trim($pub_xml));  while ($xml->read()) {    $element = $xml->name;    if ($xml->nodeType == XMLReader::ELEMENT and $element == 'controlfield') {      $tag = $xml->getAttribute('tag');      $xml->read();      $value = $xml->value;      switch ($tag) {        case '001':  // control number          $pub['Publication Accession'] = $value;          break;        case '003':  // control number identifier          break;        case '005':  // datea nd time of latest transaction          break;        case '006':  // fixed-length data elemetns          break;        case '007':  // physical description fixed field          break;        case '008':  // fixed length data elements          $month = array(            '01' => 'Jan', '02' => 'Feb', '03' => 'Mar',            '04' => 'Apr', '05' => 'May', '06' => 'Jun',            '07' => 'Jul', '08' => 'Aug', '09' => 'Sep',            '10' => 'Oct', '11' => 'Nov', '12' => 'Dec'          );          $date0 = substr($value, 0, 6);  // date entered on file          $date1 = substr($value, 7, 4);  // year of publication          $date2 = substr($value, 11, 4); // month of publication          $place = substr($value, 15, 3);          $lang  = substr($value, 35, 3);          if (preg_match('/\d\d\d\d/', $date1)) {            $pub['Year'] = $date1;            $pub['Publication Date'] = $date1;          }          if (preg_match('/\d\d/', $date2)) {            $pub['Publication Date'] = $date1 . " " . $month[substr($date2, 0, 2)] . " " . substr($date2, 3, 2);          }          if (!preg_match('/\s+/', $place)) {            $pub['Published Location'] = $place;          }          if (!preg_match('/\s+/', $lang)) {            $pub['Language Abbr'] = $lang;          }          break;        default:  // unhandled tag          break;      }    }    elseif ($xml->nodeType == XMLReader::ELEMENT and $element == 'datafield') {      $tag  = $xml->getAttribute('tag');      $ind1 = $xml->getAttribute('ind1');      $ind2 = $xml->getAttribute('ind2');      switch ($tag) {        case '16':  // National Bibliographic Agency Control Number          break;        case '35':  // System Control Number          $author = array();          $codes = tripal_pub_remote_search_AGL_get_subfield($xml);          foreach ($codes as $code => $value) {            switch ($code) {              case 'a': // System control number                $pub['Publication Accession'] = $value;                break;            }          }        case '40':  // Cataloging Source (NR)          $author = array();          $codes = tripal_pub_remote_search_AGL_get_subfield($xml);          foreach ($codes as $code => $value) {            switch ($code) {              case 'a':  // original cataolging agency                $pub['Publication Database'] = $value;                break;            }          }          break;        case '72':  // Subject Category Code          break;        case '100':  // main entry-personal name          $author = tripal_pub_remote_search_AGL_get_author($xml, $ind1);          $pub['Author List'][] = $author;          break;        case '110':  // main entry-corporate nmae          $author = array();          $codes = tripal_pub_remote_search_AGL_get_subfield($xml);          foreach ($codes as $code => $value) {            switch ($code) {              case 'a': // Corporate name or jurisdiction name as entry elemen                $author['Collective'] = $value;                break;              case 'b': // Subordinate unit                $author['Collective'] .= ' ' . $value;                break;            }          }          $pub['Author List'][] = $author;          break;        case '111':  // main entry-meeting name          break;        case '130':  // main entry-uniform title          break;        case '210':  // abbreviated title          break;        case '222':  // key title          break;        case '240':  // uniform title          break;        case '242':  // translation of title by cataloging agency          break;        case '243':  // collective uniform title          break;        case '245':  // title statement          $codes = tripal_pub_remote_search_AGL_get_subfield($xml);          foreach ($codes as $code => $value) {            switch ($code) {              case 'a':                $pub['Title'] = trim(preg_replace('/\.$/', '', $value));                break;              case 'b':                $pub['Title'] .= ' ' . $value;                break;              case 'h':                $pub['Publication Model'] = $value;                break;            }          }          break;        case '246':  // varying form of title          break;        case '247':  // former title          break;        case '250':  // edition statement          break;        case '254':  // musicla presentation statement          break;        case '255':  // cartographic mathematical data          break;        case '256':  // computer file characteristics          break;        case '257':  // country of producing entity          break;        case '258':  // philatelic issue data          break;        case '260':  // publication, distribution ,etc (imprint)          $codes = tripal_pub_remote_search_AGL_get_subfield($xml);          foreach ($codes as $code => $value) {            switch ($code) {              case 'a':                $pub['Published Location'] = $value;                break;              case 'b':                $pub['Publisher'] = $value;                break;              case 'c':                $pub['Publication Date'] = $value;                break;            }          }          break;        case '263':  // projected publication date          break;        case '264':  // production, publication, distribution, manufacture and copyright notice          break;        case '270':  // Address          break;        case '300':  // Address          $codes = tripal_pub_remote_search_AGL_get_subfield($xml);          foreach ($codes as $code => $value) {            switch ($code) {              case 'a':                $pages = $value;                $pages = preg_replace('/^p\. /', '', $pages);                $pages = preg_replace('/\.$/', '' , $pages);                if(preg_match('/p$/', $pages)) {                  // skip this, it's the number of pages not the page numbers                }                else {                  $pub['Pages'] = $pages;                }                break;            }          }          break;        case '500':  // series statements          $pub['Notes'] = $value;          break;        case '504':  // Bibliography, Etc. Note          break;        case '520':  // Summary, etc          $codes = tripal_pub_remote_search_AGL_get_subfield($xml);          foreach ($codes as $code => $value) {            switch ($code) {              case 'a':                $pub['Abstract'] = $value;                break;            }          }          break;        case '650':  // Subject Added Entry-Topical Term          $codes = tripal_pub_remote_search_AGL_get_subfield($xml);          foreach ($codes as $code => $value) {            switch ($code) {              case 'a':                $pub['Keywords'][] = $value;                break;            }          }          break;        case '653':  // Index Term-Uncontrolled          $codes = tripal_pub_remote_search_AGL_get_subfield($xml);          foreach ($codes as $code => $value) {            switch ($code) {              case 'a':                $pub['Keywords'][] = $value;                break;            }          }          break;        case '700':  // Added Entry-Personal Name          $author = tripal_pub_remote_search_AGL_get_author($xml, $ind1);          $pub['Author List'][] = $author;          break;        case '710':  // Added Entry-Corporate Name          $author = array();          $codes = tripal_pub_remote_search_AGL_get_subfield($xml);          foreach ($codes as $code => $value) {            switch ($code) {              case 'a': // Corporate name or jurisdiction name as entry elemen                $author['Collective'] = $value;                break;              case 'b': // Subordinate unit                $author['Collective'] .= ' ' . $value;                break;            }          }          $pub['Author List'][] = $author;          break;        case '773': // host item entry          $codes = tripal_pub_remote_search_AGL_get_subfield($xml);          foreach ($codes as $code => $value) {            switch ($code) {              case 'a':                if (preg_match('/Proceedings/i', $value)) {                  $pub['Series Name'] = preg_replace('/\.$/', '', $value);                  $pub['Publication Type'][0] = 'Conference Proceedings';                }                else {                  $pub['Journal Name'] = preg_replace('/\.$/', '', $value);                }                break;              case 't':                if (preg_match('/Proceedings/i', $value)) {                  $pub['Series Name'] = preg_replace('/\.$/', '', $value);                  $pub['Publication Type'][0] = 'Conference Proceedings';                }                $pub['Journal Name'] = preg_replace('/\.$/', '', $value);                break;              case 'g':                $matches = array();                if (preg_match('/^(\d\d\d\d)/', $value, $matches)) {                  $pub['Publication Date'] = $matches[1];                }                elseif (preg_match('/(.*?)(\.|\s+)\s*(\d+),\s(\d\d\d\d)/', $value, $matches)) {                  $year = $matches[4];                  $month = $matches[1];                  $day = $matches[3];                  $pub['Publication Date'] = "$year $month $day";                }                elseif (preg_match('/\((.*?)(\.|\s+)(\d\d\d\d)\)/', $value, $matches)) {                  $year = $matches[3];                  $month = $matches[1];                  $pub['Publication Date'] = "$year $month";                }                elseif (preg_match('/^(.*?) (\d\d\d\d)/', $value, $matches)) {                  $year = $matches[2];                  $month = $matches[1];                  $pub['Publication Date'] = "$year $month";                }                if (preg_match('/v\. (.*?)(,|\s+)/', $value, $matches)) {                  $pub['Volume'] = $matches[1];                }                if (preg_match('/v\. (.*?)(,|\s+)\((.*?)\)/', $value, $matches)) {                  $pub['Volume'] = $matches[1];                  $pub['Issue'] = $matches[3];                }                if (preg_match('/no\. (.*?)(\s|$)/', $value, $matches)) {                  $pub['Issue'] = $matches[1];                }                break;              case 'p':                $pub['Journal Abbreviation'] = $value;                break;              case 'z':                $pub['ISBN'] = $value;                break;            }          }          break;        case '852': // Location (Where is the publication held)          break;        case '856': // Electronic Location and Access          $codes = tripal_pub_remote_search_AGL_get_subfield($xml);          foreach ($codes as $code => $value) {            switch ($code) {              case 'u':                $pub['URL'] = $value;                break;            }          }          break;        default:          $codes = tripal_pub_remote_search_AGL_get_subfield($xml);          $unhandled[$tag][] = $codes;          break;      }    }  }  //dpm($unhandled);  // build the Dbxref  if ($pub['Publication Database'] != 'AGL') {  }  if ($pub['Publication Accession'] and $pub['Publication Database']) {    $pub['Publication Dbxref'] = $pub['Publication Database'] . ":" . $pub['Publication Accession'];    unset($pub['Publication Accession']);    unset($pub['Publication Database']);  }  // build the full authors list  if (is_array($pub['Author List'])) {    $authors = '';    foreach ($pub['Author List'] as $author) {      if (array_key_exists('valid', $author) and $author['valid'] == 'N') {        // skip non-valid entries.  A non-valid entry should have        // a corresponding corrected entry so we can saftely skip it.        continue;      }      if (array_key_exists('Collective', $author)) {        $authors .= $author['Collective'] . ', ';      }      else {        if (array_key_exists('Surname', $author)) {          $authors .= $author['Surname'];          if(array_key_exists('First Initials', $author)) {            $authors .= ' ' . $author['First Initials'];          }          $authors .= ', ';        }      }    }    $authors = substr($authors, 0, -2);    $pub['Authors'] = $authors;  }  else {    $pub['Authors'] = $pub['Author List'];  }  // for Title, Abstract, Authors, convert the html entity and remove special unicode chars that are not meant for display  $pub['Title'] = preg_replace( '/[\p{So}]/u', '', mb_convert_encoding($pub['Title'], 'UTF-8', 'HTML-ENTITIES'));  if (key_exists('Abstract', $pub)) {    $pub['Abstract'] =preg_replace( '/[\p{So}]/u', '',mb_convert_encoding($pub['Abstract'], 'UTF-8', 'HTML-ENTITIES'));  }  $newauths = array();  foreach ($pub['Author List'] AS $auth) {    foreach($auth AS $k => $v) {      $auth[$k] = preg_replace( '/[\p{So}]/u', '',mb_convert_encoding($v, 'UTF-8', 'HTML-ENTITIES'));    }    array_push($newauths, $auth);  }  $pub['Author List'] = $newauths;  // build the citation  $pub['Citation'] = tripal_pub_create_citation($pub);  $pub['raw'] = $pub_xml;  return $pub;}/** * Used for parsing of the XML results to get a set of subfields * * @param $xml *   The XMl object to read * @return *   An array of codes and their values * * @ingroup tripal_pub */function tripal_pub_remote_search_AGL_get_subfield($xml) {  $codes = array();  while ($xml->read()) {    $sub_element = $xml->name;    // when we've reached the end of the datafield element then break out of the while loop    if ($xml->nodeType == XMLReader::END_ELEMENT and $sub_element == 'datafield') {      return $codes;    }    // if inside the subfield element then get the code    if ($xml->nodeType == XMLReader::ELEMENT and $sub_element == 'subfield') {      $code = $xml->getAttribute('code');      $xml->read();      $value = $xml->value;      $codes[$code] = $value;    }  }  return $codes;}/** * Used for parsing of the XML results to get details about an author * * @param $xml *   The XML object to read * @param $ind1 *   Indicates how an author record is stored; 0 means given name is first *   1 means surname is first, 3 means a family name is given * * @return * * * @ingroup tripal_pub */function tripal_pub_remote_search_AGL_get_author($xml, $ind1) {  $author = array();  $codes = tripal_pub_remote_search_AGL_get_subfield($xml);  foreach ($codes as $code => $value) {    switch ($code) {      case 'a':        // remove any trailing commas        $value = preg_replace('/,$/', '', $value);        if ($ind1 == 0) { // Given Name is first          $author['Given Name'] = $names[0];        }        if ($ind1 == 1) { // Surname is first          // split the parts of the name using a comma          $names = explode(',', $value);          $author['Surname'] = $names[0];          $author['Given Name'] = '';          unset($names[0]);          foreach($names as $index => $name) {            $author['Given Name'] .= $name . ' ';          }          $first_names = explode(' ', $author['Given Name']);          $author['First Initials'] = '';          foreach ($first_names as $index => $name) {            $author['First Initials'] .= substr($name, 0, 1);          }        }        if ($ind1 == 3) { // A family name        }        break;    }  }  return $author;}
 |