123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731 |
- <?php
- /**
- * @file
- * @todo Add file header description
- */
- /**
- * @defgroup fasta_loader FASTA Feature Loader
- * @{
- * Provides fasta loading functionality. Creates features based on their specification in a fasta file.
- * @}
- * @ingroup tripal_feature
- */
- /**
- *
- *
- * @ingroup fasta_loader
- */
- function tripal_feature_fasta_load_form( ) {
- $form['fasta_file']= array(
- '#type' => 'textfield',
- '#title' => t('FASTA File'),
- '#description' => t('Please enter the full system path for the FASTA file, or a path within the Drupal
- installation (e.g. /sites/default/files/xyz.obo). The path must be accessible to the
- server on which this Drupal instance is running.'),
- '#required' => TRUE,
- );
- // get the list of organisms
- $sql = "SELECT * FROM {organism} ORDER BY genus, species";
- $previous_db = tripal_db_set_active('chado'); // use chado database
- $org_rset = db_query($sql);
- tripal_db_set_active($previous_db); // now use drupal database
- $organisms = array();
- $organisms[''] = '';
- while ($organism = db_fetch_object($org_rset)) {
- $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
- }
- $form['organism_id'] = array(
- '#title' => t('Organism'),
- '#type' => t('select'),
- '#description' => t("Choose the organism to which these sequences are associated"),
- '#required' => TRUE,
- '#options' => $organisms,
- );
- $form['seqtype']= array(
- '#type' => 'textfield',
- '#title' => t('Sequence Type'),
- '#required' => TRUE,
- '#description' => t('Please enter the Sequence Ontology term that describes the sequences in the FASTA file.'),
- );
- // get the list of organisms
- $sql = "SELECT L.library_id, L.name, CVT.name as type
- FROM {library} L
- INNER JOIN {cvterm} CVT ON L.type_id = CVT.cvterm_id
- ORDER BY name";
- $previous_db = tripal_db_set_active('chado'); // use chado database
- $lib_rset = db_query($sql);
- tripal_db_set_active($previous_db); // now use drupal database
- $libraries = array();
- $libraries[''] = '';
- while ($library = db_fetch_object($lib_rset)) {
- $libraries[$library->library_id] = "$library->name ($library->type)";
- }
- // $form['library_id'] = array (
- // '#title' => t('Library'),
- // '#type' => t('select'),
- // '#description' => t("Choose the library to which these sequences are associated "),
- // '#required' => FALSE,
- // '#options' => $libraries,
- // '#weight' => 5,
- // );
- $form['method']= array(
- '#type' => 'radios',
- '#title' => 'Method',
- '#required' => TRUE,
- '#options' => array(
- t('Insert only'),
- t('Update only'),
- t('Insert and update'),
- ),
- '#description' => t('Select how features in the FASTA file are handled.
- Select "Insert only" to insert the new features. If a feature already
- exists with the same name or unique name and type then it is skipped.
- Select "Update only" to only update featues that already exist in the
- database. Select "Insert and Update" to insert features that do
- not exist and upate those that do.'),
- '#default_value' => 2,
- );
- $form['match_type']= array(
- '#type' => 'radios',
- '#title' => 'Name Match Type',
- '#required' => TRUE,
- '#options' => array(
- t('Name'),
- t('Unique name'),
- ),
- '#description' => t('Feature data is stored in Chado with both a human-readable
- name and a unique name. If the features in your FASTA file are identified using
- a human-readable name then select the "Name" button. If your features are
- identified using the unique name then select the "Unique name" button. If you
- loaded your features first using the GFF loader then the unique name of each
- features were indicated by the "ID=" attribute and the name by the "Name=" attribute.
- By default, the FASTA loader will use the first word (character string
- before the first space) as the name for your feature. If
- this does not uniquely identify your feature consider specifying a regular expression in the advanced section below.
- Additionally, you may import both a name and a unique name for each sequence using the advanced options.
- When updating a sequence, the value selected here will be used to identify the sequence in the
- database in combination with any regular expression provided below.'),
- '#default_value' => 1,
- );
- $form['analysis'] = array(
- '#type' => 'fieldset',
- '#title' => t('Analysis Used to Derive Features'),
- '#collapsed' => TRUE
- );
- $form['analysis']['desc'] = array(
- '#type' => 'markup',
- '#value' => t("Why specify an analysis for a data load? All data comes
- from some place, even if downloaded from Genbank. By specifying
- analysis details for all data uploads, it allows an end user to reproduce the
- data set, but at least indicates the source of the data."),
- );
- // get the list of organisms
- $sql = "SELECT * FROM {analysis} ORDER BY name";
- $previous_db = tripal_db_set_active('chado'); // use chado database
- $org_rset = db_query($sql);
- tripal_db_set_active($previous_db); // now use drupal database
- $analyses = array();
- $analyses[''] = '';
- while ($analysis = db_fetch_object($org_rset)) {
- $analyses[$analysis->analysis_id] = "$analysis->name ($analysis->program $analysis->programversion, $analysis->sourcename)";
- }
- $form['analysis']['analysis_id'] = array(
- '#title' => t('Analysis'),
- '#type' => t('select'),
- '#description' => t("Choose the analysis to which these features are associated"),
- '#required' => TRUE,
- '#options' => $analyses,
- );
- // Advanced Options
- $form['advanced'] = array(
- '#type' => 'fieldset',
- '#title' => t('Advanced Options'),
- '#collapsible' => TRUE,
- '#collapsed' => TRUE
- );
- $form['advanced']['re_help']= array(
- '#type' => 'item',
- '#value' => t('A regular expression is an advanced method for extracting information from a string of text.
- Your FASTA file may contain both a human-readable name and a unique name for each sequence.
- If you want to import
- both the name and unique name for all sequences, then you must provide regular expressions
- so that the loader knows how to separate them.
- Otherwise the name and uniquename will be the same.
- By default, this loader will use the first word in the definition
- lines of the FASTA file
- as the name or unique name of the feature.'),
- );
- $form['advanced']['re_name']= array(
- '#type' => 'textfield',
- '#title' => t('Regular expression for the name'),
- '#required' => FALSE,
- '#description' => t('Enter the regular expression that will extract the
- feature name from the FASTA definition line. For example, for a
- defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename),
- the regular expression for the name would be, "^(.*?)\|.*$".'),
- );
- $form['advanced']['re_uname']= array(
- '#type' => 'textfield',
- '#title' => t('Regular expression for the unique name'),
- '#required' => FALSE,
- '#description' => t('Enter the regular expression that will extract the
- feature name from the FASTA definition line. For example, for a
- defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename),
- the regular expression for the unique name would be "^.*?\|(.*)$").'),
- );
- // Advanced database cross-reference optoins
- $form['advanced']['db'] = array(
- '#type' => 'fieldset',
- '#title' => t('External Database Reference'),
- '#weight' => 6,
- '#collapsed' => TRUE
- );
- $form['advanced']['db']['re_accession']= array(
- '#type' => 'textfield',
- '#title' => t('Regular expression for the accession'),
- '#required' => FALSE,
- '#description' => t('Enter the regular expression that will extract the accession for the external database for each feature from the FASTA definition line.'),
- '#weight' => 2
- );
- // get the list of databases
- $sql = "SELECT * FROM {db} ORDER BY name";
- $previous_db = tripal_db_set_active('chado'); // use chado database
- $db_rset = db_query($sql);
- tripal_db_set_active($previous_db); // now use drupal database
- $dbs = array();
- $dbs[''] = '';
- while ($db = db_fetch_object($db_rset)) {
- $dbs[$db->db_id] = "$db->name";
- }
- $form['advanced']['db']['db_id'] = array(
- '#title' => t('External Database'),
- '#type' => t('select'),
- '#description' => t("Plese choose an external database for which these sequences have a cross reference."),
- '#required' => FALSE,
- '#options' => $dbs,
- '#weight' => 1,
- );
- $form['advanced']['relationship'] = array(
- '#type' => 'fieldset',
- '#title' => t('Relationships'),
- '#weight' => 6,
- '#collapsed' => TRUE
- );
- $rels = array();
- $rels[''] = '';
- $rels['part_of'] = 'part of';
- $rels['derives_from'] = 'produced by';
- // Advanced references options
- $form['advanced']['relationship']['rel_type']= array(
- '#title' => t('Relationship Type'),
- '#type' => t('select'),
- '#description' => t("Use this option to create associations, or relationships between the
- features of this FASTA file and existing features in the database. For
- example, to associate a FASTA file of peptides to existing genes or transcript sequence,
- select the type 'produced by'. For a CDS sequences select the type 'part of'"),
- '#required' => FALSE,
- '#options' => $rels,
- '#weight' => 5,
- );
- $form['advanced']['relationship']['re_subject']= array(
- '#type' => 'textfield',
- '#title' => t('Regular expression for the parent'),
- '#required' => FALSE,
- '#description' => t('Enter the regular expression that will extract the unique
- name needed to identify the existing sequence for which the
- relationship type selected above will apply.'),
- '#weight' => 6
- );
- $form['advanced']['relationship']['parent_type']= array(
- '#type' => 'textfield',
- '#title' => t('Parent Type'),
- '#required' => FALSE,
- '#description' => t('Please enter the Sequence Ontology term for the parent. For example
- if the FASTA file being loaded is a set of proteins that are
- products of genes, then use the SO term \'gene\' or \'transcript\' or equivalent. However,
- this type must match the type for already loaded features.'),
- '#weight' => 7
- );
- $form['button'] = array(
- '#type' => 'submit',
- '#value' => t('Import FASTA file'),
- '#weight' => 10,
- );
- return $form;
- }
- /**
- *
- *
- * @ingroup fasta_loader
- */
- function tripal_feature_fasta_load_form_validate($form, &$form_state) {
- $fasta_file = trim($form_state['values']['fasta_file']);
- $organism_id = $form_state['values']['organism_id'];
- $type = trim($form_state['values']['seqtype']);
- $method = trim($form_state['values']['method']);
- $match_type = trim($form_state['values']['match_type']);
- $library_id = $form_state['values']['library_id'];
- $re_name = trim($form_state['values']['re_name']);
- $re_uname = trim($form_state['values']['re_uname']);
- $re_accession = trim($form_state['values']['re_accession']);
- $db_id = $form_state['values']['db_id'];
- $rel_type = $form_state['values']['rel_type'];
- $re_subject = trim($form_state['values']['re_subject']);
- $parent_type = trim($form_state['values']['parent_type']);
- if ($method == 0) {
- $method = 'Insert only';
- }
- if ($method == 1) {
- $method = 'Update only';
- }
- if ($method == 2) {
- $method = 'Insert and update';
- }
- if ($match_type == 0) {
- $match_type = 'Name';
- }
- if ($match_type == 1) {
- $match_type = 'Unique name';
- }
- if ($re_name and !$re_uname and strcmp($match_type, 'Unique name')==0) {
- form_set_error('re_uname', t("You must provide a regular expression to identify the sequence unique name"));
- }
- if (!$re_name and $re_uname and strcmp($match_type, 'Name')==0) {
- form_set_error('re_name', t("You must provide a regular expression to identify the sequence name"));
- }
- // check to see if the file is located local to Drupal
- $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $fasta_file;
- if (!file_exists($dfile)) {
- // if not local to Drupal, the file must be someplace else, just use
- // the full path provided
- $dfile = $fasta_file;
- }
- if (!file_exists($dfile)) {
- form_set_error('fasta_file', t("Cannot find the file on the system. Check that the file exists or that the web server has permissions to read the file."));
- }
- // make sure if a relationship is specified that all fields are provided.
- if (($rel_type or $parent_type) and !$re_subject) {
- form_set_error('re_subject', t("Please provide a regular expression for the parent"));
- }
- if (($rel_type or $re_subject) and !$parent_type) {
- form_set_error('parent_type', t("Please provide a SO term for the parent"));
- }
- if (($parent_type or $re_subject) and !$rel_type) {
- form_set_error('rel_type', t("Please select a relationship type"));
- }
- // make sure if a database is specified that all fields are provided
- if ($db_id and !$re_accession) {
- form_set_error('re_accession', t("Please provide a regular expression for the accession"));
- }
- if ($re_accession and !$db_id) {
- form_set_error('db_id', t("Please select a database"));
- }
- // check to make sure the types exists
- $cvtermsql = "SELECT CVT.cvterm_id
- FROM {cvterm} CVT
- INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
- LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
- WHERE cv.name = '%s' and (CVT.name = '%s' or CVTS.synonym = '%s')";
- $cvterm = db_fetch_object(db_query($cvtermsql, 'sequence', $type, $type));
- if (!$cvterm) {
- form_set_error('type', t("The Sequence Ontology (SO) term selected for the sequence type is not available in the database. Please check spelling or select another."));
- }
- if ($rel_type) {
- $cvterm = db_fetch_object(db_query($cvtermsql, 'sequence', $parent_type, $parent_type));
- if (!$cvterm) {
- form_set_error('parent_type', t("The Sequence Ontology (SO) term selected for the parent relationship is not available in the database. Please check spelling or select another."));
- }
- }
- // check to make sure the 'relationship' and 'sequence' ontologies are loaded
- $form_state['storage']['dfile'] = $dfile;
- }
- /**
- *
- *
- * @ingroup fasta_loader
- */
- function tripal_feature_fasta_load_form_submit($form, &$form_state) {
- global $user;
- $dfile = $form_state['storage']['dfile'];
- $organism_id = $form_state['values']['organism_id'];
- $type = trim($form_state['values']['seqtype']);
- $method = trim($form_state['values']['method']);
- $match_type = trim($form_state['values']['match_type']);
- $library_id = $form_state['values']['library_id'];
- $re_name = trim($form_state['values']['re_name']);
- $re_uname = trim($form_state['values']['re_uname']);
- $re_accession = trim($form_state['values']['re_accession']);
- $db_id = $form_state['values']['db_id'];
- $rel_type = $form_state['values']['rel_type'];
- $re_subject = trim($form_state['values']['re_subject']);
- $parent_type = trim($form_state['values']['parent_type']);
- $analysis_id = $form_state['values']['analysis_id'];
- if ($method == 0) {
- $method = 'Insert only';
- }
- if ($method == 1) {
- $method = 'Update only';
- }
- if ($method == 2) {
- $method = 'Insert and update';
- }
- if ($match_type == 0) {
- $match_type = 'Name';
- }
- if ($match_type == 1) {
- $match_type = 'Unique name';
- }
- $args = array($dfile, $organism_id, $type, $library_id, $re_name, $re_uname,
- $re_accession, $db_id, $rel_type, $re_subject, $parent_type, $method,
- $user->uid, $analysis_id, $match_type);
- tripal_add_job("Import FASTA file: $dfile", 'tripal_feature',
- 'tripal_feature_load_fasta', $args, $user->uid);
- }
- /**
- *
- *
- * @ingroup fasta_loader
- */
- function tripal_feature_load_fasta($dfile, $organism_id, $type,
- $library_id, $re_name, $re_uname, $re_accession, $db_id, $rel_type,
- $re_subject, $parent_type, $method, $uid, $analysis_id,
- $match_type, $job = NULL) {
- print "Opening FASTA file $dfile\n";
- $lines = file($dfile, FILE_SKIP_EMPTY_LINES);
- $i = 0;
- $name = '';
- $uname = '';
- $residues = '';
- $num_lines = sizeof($lines);
- $interval = intval($num_lines * 0.01);
- if ($interval == 0) {
- $interval = 1;
- }
- foreach ($lines as $line_num => $line) {
- $i++; // update the line count
- // update the job status every 1% features
- if ($job and $i % $interval == 0) {
- tripal_job_set_progress($job, intval(($i/$num_lines)*100));
- }
- // if we encounter a definition line then get the name, uniquename,
- // accession and relationship subject from the definition line
- if (preg_match('/^>/', $line)) {
- // if we have a feature name then we are starting a new sequence
- // so let's handle the previous one before moving on
- if ($name or $uname) {
- tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id,
- $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $type,
- $source, $residues, $method, $re_name, $match_type);
- $residues = '';
- $name = '';
- $uname = '';
- }
- $line = preg_replace("/^>/", '', $line);
- // get the feature name
- if ($re_name) {
- if (!preg_match("/$re_name/", $line, $matches)) {
- print "WARNING: Regular expression for the feature name finds nothing\n";
- }
- $name = trim($matches[1]);
- }
- else {
- // if the match_type is name and no regular expression was provided
- // then use the first word as the name, otherwise we don't set the name
- if (strcmp($match_type, 'Name')==0) {
- preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches);
- $name = trim($matches[1]);
- }
- }
- // get the feature unique name
- if ($re_uname) {
- if (!preg_match("/$re_uname/", $line, $matches)) {
- print "WARNING: Regular expression for the feature unique name finds nothing\n";
- }
- $uname = trim($matches[1]);
- }
- else {
- // if the match_type is name and no regular expression was provided
- // then use the first word as the name, otherwise, we don't set the unqiuename
- if (strcmp($match_type, 'Unique name')==0) {
- preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches);
- $uname = trim($matches[1]);
- }
- }
- // get the accession
- preg_match("/$re_accession/", $line, $matches);
- $accession = trim($matches[1]);
- // get the relationship subject
- preg_match("/$re_subject/", $line, $matches);
- $subject = trim($matches[1]);
- }
- else {
- $residues .= trim($line);
- }
- }
- // now load the last sequence in the file
- tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id,
- $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $type,
- $source, $residues, $method, $re_name, $match_type);
- return '';
- }
- /**
- *
- *
- * @ingroup fasta_loader
- */
- function tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id, $accession,
- $parent, $rel_type, $parent_type, $analysis_id, $organism_id, $type,
- $source, $residues, $method, $re_name, $match_type) {
- $previous_db = tripal_db_set_active('chado');
- // first get the type for this sequence
- $cvtermsql = "SELECT CVT.cvterm_id
- FROM {cvterm} CVT
- INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
- LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
- WHERE cv.name = '%s' and (CVT.name = '%s' or CVTS.synonym = '%s')";
- $cvterm = db_fetch_object(db_query($cvtermsql, 'sequence', $type, $type));
- if (!$cvterm) {
- print "ERROR: cannot find the term type: '$type'\n";
- return 0;
- }
- // check to see if this feature already exists
- if (strcmp($match_type, 'Name')==0) {
- $cnt_sql = "SELECT count(*) as cnt FROM {feature}
- WHERE organism_id = %d and name = '%s' and type_id = %d";
- $cnt = db_fetch_object(db_query($cnt_sql, $organism_id, $name, $cvterm->cvterm_id));
- if ($cnt->cnt > 1) {
- print "ERROR: multiple features exist with the name '$name' of type '$type' for the organism. skipping\n";
- return 0;
- }
- else {
- $feature_sql = "SELECT * FROM {feature}
- WHERE organism_id = %d and name = '%s' and type_id = %d";
- $feature = db_fetch_object(db_query($feature_sql, $organism_id, $name, $cvterm->cvterm_id));
- }
- }
- if (strcmp($match_type, 'Unique name')==0) {
- $feature_sql = "SELECT * FROM {feature}
- WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
- $feature = db_fetch_object(db_query($feature_sql, $organism_id, $uname, $cvterm->cvterm_id));
- }
- if (!$feature and (strcmp($method, 'Insert only')==0 or strcmp($method, 'Insert and update')==0)) {
- // if we have a unique name but not a name then set them to be teh same
- // and vice versa
- if (!$uname) {
- $uname = $name;
- }
- elseif (!$name) {
- $name = $uname;
- }
- // now insert the feature
- $sql = "INSERT INTO {feature}
- (organism_id, name, uniquename, residues, seqlen,
- md5checksum,type_id,is_analysis,is_obsolete)
- VALUES(%d,'%s','%s','%s',%d, '%s', %d, %s, %s)";
- $result = db_query($sql, $organism_id, $name, $uname, $residues, drupal_strlen($residues),
- md5($residues), $cvterm->cvterm_id, 'false', 'false');
- if (!$result) {
- print "ERROR: failed to insert feature '$name ($uname)'\n";
- return 0;
- }
- else {
- print "Inserted feature $name ($uname)\n";
- }
- $feature = db_fetch_object(db_query($feature_sql, $organism_id, $uname, $cvterm->cvterm_id));
- }
- if (!$feature and (strcmp($method, 'Update only')==0 or drupal_strcmp($method, 'Insert and update')==0)) {
- print "WARNING: failed to find feature '$name' ('$uname') while matching on " . drupal_strtolower($match_type) . ". Skipping\n";
- return 0;
- }
- if ($feature and (strcmp($method, 'Update only')==0 or strcmp($method, 'Insert and update')==0)) {
- if (strcmp($method, 'Update only')==0 or strcmp($method, 'Insert and update')==0) {
- if (strcmp($match_type, 'Name')==0) {
- // if we're matching on the name but do not have a new unique name then we
- // don't want to update the uniquename. If we do have a uniquename then we
- // should update it. We only get a uniquename if there was a regular expression
- // provided for pulling it out
- if ($uname) {
- $sql = "UPDATE {feature}
- SET uniquename = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
- WHERE organism_id = %d and name = '%s' and type_id = %d";
- $result = db_query($sql, $uname, $residues, drupal_strlen($residues), md5($residues), $organism_id, $name, $cvterm->cvterm_id);
- }
- else {
- $sql = "UPDATE {feature}
- SET residues = '%s', seqlen = '%s', md5checksum = '%s'
- WHERE organism_id = %d and name = '%s' and type_id = %d";
- $result = db_query($sql, $residues, durpal_strlen($residues), md5($residues), $organism_id, $name, $cvterm->cvterm_id);
- }
- }
- else {
- // if we're matching on the unique name but do not have a new name then we
- // don't want to update the name. If we do have a name then we
- // should update it. We only get a name if there was a regular expression
- // provided for pulling it out
- if ($name) {
- $sql = "UPDATE {feature}
- SET name = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
- WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
- $result = db_query($sql, $name, $residues, drupal_strlen($residues), md5($residues), $organism_id, $uname, $cvterm->cvterm_id);
- }
- else {
- $sql = "UPDATE {feature}
- SET residues = '%s', seqlen = '%s', md5checksum = '%s'
- WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
- $result = db_query($sql, $residues, drupal_strlen($residues), md5($residues), $organism_id, $uname, $cvterm->cvterm_id);
- }
- }
- if (!$result) {
- print "ERROR: failed to update feature '$name ($uname)'\n";
- return 0;
- }
- else {
- print "Updated feature $name ($uname)\n";
- }
- }
- else {
- print "WARNING: feature already exists: '$name' ('$uname'). Skipping\n";
- }
- }
- // now get the feature
- $feature = db_fetch_object(db_query($feature_sql, $organism_id, $uname, $cvterm->cvterm_id));
- if (!$feature) {
- print "Something bad has happened: $organism_id, $uname, $cvterm->cvterm_id\n";
- return 0;
- }
- // add in the analysis link
- if ($analysis_id) {
- // @coder-ignore: non-drupal table thus table prefixing doesn't apply
- $analysis_link_sql = 'SELECT * FROM analysisfeature WHERE analysis_id=%d AND feature_id=%d';
- $analysis_link = db_fetch_object(db_query($analysis_link_sql, $analysis_id, $feature->feature_id));
- if (!$analysis_link) {
- // @coder-ignore: non-drupal table thus table prefixing doesn't apply
- $sql = "INSERT INTO analysisfeature (analysis_id, feature_id) VALUES (%d, %d)";
- $result = db_query($sql, $analysis_id, $feature->feature_id);
- if (!$result) {
- print "WARNING: could not add link between analysis: " . $analysis_id . " and feature: " . $feature->uniquename . "\n";
- }
- $analysis_link = db_fetch_object(db_query($analysis_link_sql, $analysis_id, $feature->feature_id));
- }
- }
- // now add the database cross reference
- if ($db_id) {
- // check to see if this accession reference exists, if not add it
- // @coder-ignore: non-drupal table thus table prefixing doesn't apply
- $dbxrefsql = "SELECT * FROM dbxref WHERE db_id = %d and accession = '%s'";
- $dbxref = db_fetch_object(db_query($dbxrefsql, $db_id, $accession));
- if (!$dbxref) {
- // @coder-ignore: non-drupal table thus table prefixing doesn't apply
- $sql = "INSERT INTO dbxref (db_id,accession) VALUES (%d, '%s')";
- $result = db_query($sql, $db_id, $accession);
- if (!$result) {
- print "WARNING: could not add external database acession: '$name accession: $accession'\n";
- }
- $dbxref = db_fetch_object(db_query($dbxrefsql, $db_id, $accession));
- }
- // check to see if the feature dbxref record exists if not, then add it
- $fdbxrefsql = "SELECT * FROM {feature_dbxref} WHERE feature_id = %d and dbxref_id = %d";
- $fdbxref = db_fetch_object(db_query($fdbxrefsql, $feature->feature_id, $dbxref->dbxref_id));
- if (!$fdbxref) {
- $sql = "INSERT INTO {feature_dbxref} (feature_id,dbxref_id) VALUES (%d, %d)";
- $result = db_query($sql, $feature->feature_id, $dbxref->dbxref_id);
- if (!$result) {
- print "WARNING: could not associate database cross reference with feature: '$name accession: $accession'\n";
- }
- else {
- print "Added database crossreference $name ($uname) -> $accession\n";
- }
- }
- }
- // now add in the relationship if one exists. First, get the parent type for the relationship
- // then get the parent feature
- if ($rel_type) {
- $parentcvterm = db_fetch_object(db_query($cvtermsql, 'sequence', $parent_type, $parent_type));
- $relcvterm = db_fetch_object(db_query($cvtermsql, 'relationship', $rel_type, $rel_type));
- $parent_feature = db_fetch_object(db_query($feature_sql, $organism_id, $parent, $parentcvterm->cvterm_id));
- if ($parent_feature) {
- // check to see if the relationship already exists
- $sql = "SELECT * FROM {feature_relationship} WHERE subject_id = %d and object_id = %d and type_id = %d";
- $rel = db_fetch_object(db_query($sql, $feature->feature_id, $parent_feature->feature_id, $relcvterm->cvterm_id));
- if ($rel) {
- print "WARNING: relationship already exists, skipping '$uname' ($type) $rel_type '$parent' ($parent_type)\n";
- }
- else {
- $sql = "INSERT INTO {feature_relationship} (subject_id,object_id,type_id)
- VALUES (%d,%d,%d)";
- $result = db_query($sql, $feature->feature_id, $parent_feature->feature_id, $relcvterm->cvterm_id);
- if (!$result) {
- print "WARNING: failed to insert feature relationship '$uname' ($type) $rel_type '$parent' ($parent_type)\n";
- }
- else {
- print "Inserted relationship relationship: '$uname' ($type) $rel_type '$parent' ($parent_type)\n";
- }
- }
- }
- else {
- print "WARNING: cannot establish relationship '$uname' ($type) $rel_type '$parent' ($parent_type): Cannot find the parent\n";
- }
- }
- tripal_db_set_active($previous_db);
- }
|