gff_loader.php 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731
  1. <?php
  2. // TODO: The rank column on the feature_relationship table needs to be used to
  3. // make sure the ordering of CDS (exons) is correct.
  4. // The entries in the GFF file are not in order so the order of the relationships
  5. // is not put in correctly.
  6. /*************************************************************************
  7. *
  8. */
  9. function tripal_core_gff3_load_form (){
  10. $form['gff_file']= array(
  11. '#type' => 'textfield',
  12. '#title' => t('GFF3 File'),
  13. '#description' => t('Please enter the full system path for the GFF file, or a path within the Drupal
  14. installation (e.g. /sites/default/files/xyz.obo). The path must be accessible to the
  15. server on which this Drupal instance is running.'),
  16. '#required' => TRUE,
  17. '#weight' => 1
  18. );
  19. // get the list of organisms
  20. $sql = "SELECT * FROM {organism} ORDER BY genus, species";
  21. $previous_db = tripal_db_set_active('chado'); // use chado database
  22. $org_rset = db_query($sql);
  23. tripal_db_set_active($previous_db); // now use drupal database
  24. $organisms = array();
  25. $organisms[''] = '';
  26. while($organism = db_fetch_object($org_rset)){
  27. $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
  28. }
  29. $form['organism_id'] = array (
  30. '#title' => t('Organism'),
  31. '#type' => t('select'),
  32. '#description' => t("Choose the organism to which these sequences are associated "),
  33. '#required' => TRUE,
  34. '#options' => $organisms,
  35. );
  36. $form['import_options'] = array(
  37. '#type' => 'fieldset',
  38. '#title' => t('Import Options'),
  39. '#weight'=> 6,
  40. '#collapsed' => TRUE
  41. );
  42. $form['import_options']['add_only']= array(
  43. '#type' => 'checkbox',
  44. '#title' => t('Import only new features'),
  45. '#required' => FALSE,
  46. '#description' => t('The job will skip features in the GFF file that already
  47. exist in the database and import only new features.'),
  48. '#weight' => 2
  49. );
  50. $form['import_options']['update']= array(
  51. '#type' => 'checkbox',
  52. '#title' => t('Import all and update'),
  53. '#required' => FALSE,
  54. '#default_value' => 'checked',
  55. '#description' => t('Existing features will be updated and new features will be added. Attributes
  56. for a feature that are not present in the GFF but which are present in the
  57. database will not be altered.'),
  58. '#weight' => 3
  59. );
  60. $form['import_options']['refresh']= array(
  61. '#type' => 'checkbox',
  62. '#title' => t('Import all and replace'),
  63. '#required' => FALSE,
  64. '#description' => t('Existing features will be updated and feature properties not
  65. present in the GFF file will be removed.'),
  66. '#weight' => 4
  67. );
  68. $form['import_options']['remove']= array(
  69. '#type' => 'checkbox',
  70. '#title' => t('Delete features'),
  71. '#required' => FALSE,
  72. '#description' => t('Features present in the GFF file that exist in the database
  73. will be removed rather than imported'),
  74. '#weight' => 5
  75. );
  76. $form['button'] = array(
  77. '#type' => 'submit',
  78. '#value' => t('Import GFF3 file'),
  79. '#weight' => 10,
  80. );
  81. return $form;
  82. }
  83. /*************************************************************************
  84. *
  85. */
  86. function tripal_core_gff3_load_form_validate ($form, &$form_state){
  87. $gff_file = $form_state['values']['gff_file'];
  88. $organism_id = $form_state['values']['organism_id'];
  89. $add_only = $form_state['values']['add_only'];
  90. $update = $form_state['values']['update'];
  91. $refresh = $form_state['values']['refresh'];
  92. $remove = $form_state['values']['remove'];
  93. // check to see if the file is located local to Drupal
  94. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $gff_file;
  95. if(!file_exists($dfile)){
  96. // if not local to Drupal, the file must be someplace else, just use
  97. // the full path provided
  98. $dfile = $gff_file;
  99. }
  100. if(!file_exists($dfile)){
  101. form_set_error('gff_file',t("Cannot find the file on the system. Check that the file exists or that the web server has permissions to read the file."));
  102. }
  103. if (($add_only and ($update or $refresh or $remove)) or
  104. ($update and ($add_only or $refresh or $remove)) or
  105. ($refresh and ($update or $add_only or $remove)) or
  106. ($remove and ($update or $refresh or $add_only))){
  107. form_set_error('add_only',t("Please select only one checkbox from the import options section"));
  108. }
  109. }
  110. /*************************************************************************
  111. *
  112. */
  113. function tripal_core_gff3_load_form_submit ($form, &$form_state){
  114. global $user;
  115. $gff_file = $form_state['values']['gff_file'];
  116. $organism_id = $form_state['values']['organism_id'];
  117. $add_only = $form_state['values']['add_only'];
  118. $update = $form_state['values']['update'];
  119. $refresh = $form_state['values']['refresh'];
  120. $remove = $form_state['values']['remove'];
  121. $args = array($gff_file,$organism_id,$add_only,$update,$refresh,$remove);
  122. $type = '';
  123. if($add_only){
  124. $type = 'import only new features';
  125. }
  126. if($update){
  127. $type = 'import all and update';
  128. }
  129. if($refresh){
  130. $type = 'import all and replace';
  131. }
  132. if($remove){
  133. $type = 'delete features';
  134. }
  135. tripal_add_job("$type GFF3 file $dfile",'tripal_core',
  136. 'tripal_core_load_gff3',$args,$user->uid);
  137. return '';
  138. }
  139. /*************************************************************************
  140. *
  141. */
  142. function tripal_core_load_gff3($gff_file, $organism_id,$add_only =0,
  143. $update = 0, $refresh = 0, $remove = 0, $job = NULL)
  144. {
  145. // this array is used to cache all of the features in the GFF file and
  146. // used to lookup parent and target relationships
  147. $gff_features = array();
  148. // check to see if the file is located local to Drupal
  149. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $gff_file;
  150. if(!file_exists($dfile)){
  151. // if not local to Drupal, the file must be someplace else, just use
  152. // the full path provided
  153. $dfile = $gff_file;
  154. }
  155. if(!file_exists($dfile)){
  156. print "ERROR: cannot find the file: $dfile\n";
  157. return 0;
  158. }
  159. $previous_db = tripal_db_set_active('chado');
  160. print "Opening $gff_file\n";
  161. $lines = file($dfile,FILE_SKIP_EMPTY_LINES);
  162. $i = 0;
  163. // get the controlled vocaubulary that we'll be using. The
  164. // default is the 'sequence' ontology
  165. $sql = "SELECT * FROM cv WHERE name = '%s'";
  166. $cv = db_fetch_object(db_query($sql,'sequence'));
  167. if(!$cv){
  168. print "ERROR: cannot find the 'sequence' ontology\n";
  169. return '';
  170. }
  171. // get the organism for which this GFF3 file belongs
  172. $sql = "SELECT * FROM organism WHERE organism_id = %d";
  173. $organism = db_fetch_object(db_query($sql,$organism_id));
  174. $num_lines = sizeof($lines);
  175. $interval = intval($num_lines * 0.01);
  176. foreach ($lines as $line_num => $line) {
  177. $i++; // update the line count
  178. // update the job status every 1% features
  179. if($job and $i % $interval == 0){
  180. tripal_job_set_progress($job,intval(($i/$num_lines)*100));
  181. }
  182. // skip comments
  183. if(preg_match('/^#/',$line)){
  184. continue;
  185. }
  186. // skip empty lines
  187. if(preg_match('/^\s*$/',$line)){
  188. continue;
  189. }
  190. // TODO: remove URL encoding
  191. $cols = explode("\t",$line);
  192. if(sizeof($cols) != 9){
  193. print "ERROR: improper number of columns on line $i\n";
  194. return '';
  195. }
  196. // get the column values
  197. $landmark = $cols[0];
  198. $source = $cols[1];
  199. $type = $cols[2];
  200. $start = $cols[3];
  201. $end = $cols[4];
  202. $score = $cols[5];
  203. $strand = $cols[6];
  204. $phase = $cols[7];
  205. $attrs = explode(";",$cols[8]); // split by a semi-colon
  206. // ready the start and stop for chado
  207. $fmin = $start;
  208. $fmax = $end;
  209. if($end < $start){
  210. $fmin = $end;
  211. $fmax = $start;
  212. }
  213. // format the strand for chado
  214. if(strcmp($strand,'.')==0){
  215. $strand = 0;
  216. }
  217. elseif(strcmp($strand,'+')==0){
  218. $strand = 1;
  219. }
  220. elseif(strcmp($strand,'-')==0){
  221. $strand = -1;
  222. }
  223. if(strcmp($phase,'.')==0){
  224. $phase = '';
  225. }
  226. // get the type record
  227. $cvtermsql = "SELECT CVT.cvterm_id, CVT.cv_id, CVT.name, CVT.definition,
  228. CVT.dbxref_id, CVT.is_obsolete, CVT.is_relationshiptype
  229. FROM {cvterm} CVT
  230. INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
  231. LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
  232. WHERE CV.cv_id = %d and (CVT.name = '%s' or CVTS.synonym = '%s')";
  233. $cvterm = db_fetch_object(db_query($cvtermsql,$cv->cv_id,$type,$type));
  234. if(!$cvterm){
  235. print "ERROR: cannot find ontology term '$type' on line $i.\n";
  236. return '';
  237. }
  238. // break apart each of the attributes
  239. $tags = array();
  240. $attr_name = '';
  241. $attr_uniquename = '';
  242. $attr_residue_info = '';
  243. $attr_locgroup = 0;
  244. $attr_fmin_partial = 'f';
  245. $attr_fmax_partial = 'f';
  246. $attr_is_obsolete = 'f';
  247. $attr_is_analysis = 'f';
  248. $attr_others = '';
  249. $residues = '';
  250. $generate_name = 0;
  251. foreach($attrs as $attr){
  252. $attr = rtrim($attr);
  253. $attr = ltrim($attr);
  254. if(strcmp($attr,'')==0){
  255. continue;
  256. }
  257. if(!preg_match('/^[^\=]+\=[^\=]+$/',$attr)){
  258. print "ERROR: attribute is not correctly formatted on line $i: $attr\n";
  259. return '';
  260. }
  261. // break apart each tag
  262. $tag = explode("=",$attr); // split by equals sign
  263. // multiple instances of an attribute are separated by commas
  264. $tags[$tag[0]] = explode(",",$tag[1]); // split by comma
  265. if(strcmp($tag[0],'ID')==0){
  266. $attr_uniquename = $tag[1];
  267. }
  268. elseif(strcmp($tag[0],'Name')==0){
  269. $attr_name = $tag[1];
  270. }
  271. // get the list of other attributes other than those reserved ones.
  272. elseif(strcmp($tag[0],'Alias')!=0 and strcmp($tag[0],'Parent')!=0 and
  273. strcmp($tag[0],'Target')!=0 and strcmp($tag[0],'Gap')!=0 and
  274. strcmp($tag[0],'Derives_from')!=0 and strcmp($tag[0],'Note')!=0 and
  275. strcmp($tag[0],'Dbxref')!=0 and strcmp($tag[0],'Ontology_term')!=0 and
  276. strcmp($tag[0],'Is_circular')!=0){
  277. $attr_others[$tag[0]] = $tag[1];
  278. }
  279. }
  280. // if neither name nor uniquename are provided then generate one
  281. if(!$attr_uniquename and !$attr_name){
  282. $generate_name = 1;
  283. }
  284. // if a name is not specified then use the unique name
  285. if(strcmp($attr_name,'')==0){
  286. $attr_name = $attr_uniquename;
  287. }
  288. // if an ID attribute is not specified then use the attribute name and
  289. // hope for the best
  290. if(!$attr_uniquename){
  291. $attr_uniquename = $attr_name;
  292. }
  293. // make sure the landmark sequence exists in the database. We don't
  294. // know the type of the landmark so we'll hope that it's unique across
  295. // all types. If not we'll error out. This test is only necessary if
  296. // if the landmark and the uniquename are different. If they are the same
  297. // then this is the information for the landmark
  298. if(strcmp($landmark,$attr_uniquename)!=0){
  299. $feature_sql = "SELECT count(*) as num_landmarks
  300. FROM {feature}
  301. WHERE organism_id = %d and uniquename = '%s'";
  302. $count = db_fetch_object(db_query($feature_sql,$organism_id,$landmark));
  303. if(!$count or $count->num_landmarks == 0){
  304. print "ERROR: the landmark '$landmark' cannot be found for this organism. ".
  305. "Please add the landmark and then retry the import of this GFF3 ".
  306. "file.\n";
  307. return '';
  308. }
  309. if($count->num_landmarks > 1){
  310. print "ERROR: the landmark '$landmark' is not unique for this organism. ".
  311. "The features cannot be associated.\n";
  312. return '';
  313. }
  314. }
  315. // if the option is to remove or refresh then we want to remove
  316. // the feature from the database.
  317. if(!$generate_name and ($remove or $refresh)){
  318. print "Removing feature '$attr_uniquename'\n";
  319. $sql = "DELETE FROM {feature}
  320. WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
  321. $result = db_query($sql,$organism->organism_id,$attr_uniquename,$cvterm->cvterm_id);
  322. if(!$result){
  323. print "ERROR: cannot delete feature $attr_uniquename\n";
  324. }
  325. $feature = 0;
  326. }
  327. // add or update the feature and all properties
  328. if($update or $refresh or $add_only){
  329. // add/update the feature
  330. $feature = tripal_core_load_gff3_feature($organism,$cvterm,
  331. $attr_uniquename,$attr_name,$residues,$attr_is_analysis,
  332. $attr_is_obsolete, $add_only,$generate_name);
  333. // store all of the features so far use later by parent and target
  334. // relationships
  335. $gff_features[$feature->uniquename]['type'] = $type;
  336. if($feature){
  337. if($generate_name){
  338. $attr_uniquename = $feature->uniquename;
  339. }
  340. // add/update the featureloc if the landmark and the ID are not the same
  341. // if they are the same then this entry in the GFF is probably a landmark identifier
  342. if(strcmp($landmark,$attr_uniquename)!=0){
  343. tripal_core_load_gff3_featureloc($feature,$organism,
  344. $landmark,$fmin,$fmax,$strand,$phase,$attr_fmin_partial,
  345. $attr_fmax_partial,$attr_residue_info,$attr_locgroup);
  346. }
  347. // add any aliases for this feature
  348. if(array_key_exists('Alias',$tags)){
  349. tripal_core_load_gff3_alias($feature,$tags['Alias']);
  350. }
  351. // add any aliases for this feature
  352. if(array_key_exists('Dbxref',$tags)){
  353. tripal_core_load_gff3_dbxref($feature,$tags['Dbxref']);
  354. }
  355. // add parent relationships
  356. if(array_key_exists('Parent',$tags)){
  357. tripal_core_load_gff3_parents($feature,$cvterm,$tags['Parent'],$gff_features,$organism_id);
  358. }
  359. }
  360. }
  361. }
  362. tripal_db_set_active($previous_db);
  363. return '';
  364. }
  365. /*************************************************************************
  366. *
  367. */
  368. function tripal_core_load_gff3_parents($feature,$cvterm,$parents,$gff_features,$organism_id){
  369. $uname = $feature->uniquename;
  370. $type = $cvterm->name;
  371. $rel_type = 'part_of';
  372. // create these SQL statements that will be used repeatedly below.
  373. $cvtermsql = "SELECT CVT.cvterm_id
  374. FROM {cvterm} CVT
  375. INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
  376. LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
  377. WHERE cv.name = '%s' and (CVT.name = '%s' or CVTS.synonym = '%s')";
  378. $feature_sql = "SELECT * FROM {feature}
  379. WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
  380. // iterate through the parents in the list
  381. foreach($parents as $parent){
  382. $parent_type = $gff_features[$parent]['type'];
  383. $parentcvterm = db_fetch_object(db_query($cvtermsql,'sequence',$parent_type,$parent_type));
  384. $relcvterm = db_fetch_object(db_query($cvtermsql,'relationship',$rel_type,$rel_type));
  385. $parent_feature = db_fetch_object(db_query($feature_sql,$organism_id,$parent,$parentcvterm->cvterm_id));
  386. if($parent_feature){
  387. // check to see if the relationship already exists
  388. $sql = "SELECT * FROM {feature_relationship} WHERE subject_id = %d and object_id = %d and type_id = %d";
  389. $rel = db_fetch_object(db_query($sql,$feature->feature_id,$parent_feature->feature_id,$relcvterm->cvterm_id));
  390. if($rel){
  391. print " Relationship already exists, skipping '$uname' ($type) $rel_type '$parent' ($parent_type)\n";
  392. } else {
  393. $sql = "INSERT INTO {feature_relationship} (subject_id,object_id,type_id)
  394. VALUES (%d,%d,%d)";
  395. $result = db_query($sql,$feature->feature_id,$parent_feature->feature_id,$relcvterm->cvterm_id);
  396. if(!$result){
  397. print "WARNING: failed to insert feature relationship '$uname' ($type) $rel_type '$parent' ($parent_type)\n";
  398. } else {
  399. print " Inserted relationship relationship: '$uname' ($type) $rel_type '$parent' ($parent_type)\n";
  400. }
  401. }
  402. }
  403. else {
  404. print "WARNING: cannot establish relationship '$uname' ($type) $rel_type '$parent' ($parent_type): Cannot find the parent\n";
  405. }
  406. }
  407. }
  408. /*************************************************************************
  409. *
  410. */
  411. function tripal_core_load_gff3_dbxref($feature,$dbxrefs){
  412. foreach($dbxrefs as $dbxref){
  413. // check to see if this accession reference exists, if not add it
  414. // $dbxrefsql = "SELECT * FROM {dbxref} WHERE db_id = %s and accession = '%s'";
  415. // $dbxref = db_fetch_object(db_query($dbxrefsql,$db_id,$accession));
  416. // if(!$dbxref){
  417. // $sql = "INSERT INTO {dbxref} (db_id,accession) VALUES (%d,'%s')";
  418. // $result = db_query($sql,$db_id,$accession);
  419. // if(!$result){
  420. // print "WARNING: could not add external database acession: '$name accession: $accession'\n";
  421. // }
  422. // $dbxref = db_fetch_object(db_query($dbxrefsql,$db_id,$accession));
  423. // }
  424. }
  425. }
  426. /*************************************************************************
  427. *
  428. */
  429. function tripal_core_load_gff3_alias($feature,$aliases){
  430. // make sure we have a 'synonym_type' vocabulary
  431. $sql = "SELECT * FROM {cv} WHERE name='synonym_type'";
  432. $syncv = db_fetch_object(db_query($sql));
  433. if(!$syncv){
  434. $sql = "INSERT INTO {cv} (name,definition) VALUES ('synonym_type','')";
  435. if(!db_query($sql)){
  436. print("ERROR: Failed to add the synonyms type vocabulary");
  437. return 0;
  438. }
  439. $syncv = db_fetch_object(db_query($sql));
  440. }
  441. // get the 'exact' cvterm, which is the type of synonym we're adding
  442. $cvtsql = "
  443. SELECT * FROM {cvterm} CVT
  444. INNER JOIN {cv} CV ON CVT.cv_id = CV.cv_id
  445. WHERE CVT.name = '%s' and CV.name = '%s'
  446. ";
  447. $syntype = db_fetch_object(db_query($cvtsql,'exact','synonym_type'));
  448. if(!$syntype){
  449. $term = array(
  450. 'name' => array('exact'),
  451. 'id' => array("internal:exact"),
  452. 'definition' => array(''),
  453. 'is_obsolete' => array(0),
  454. );
  455. $syntype = tripal_cv_obo_add_cv_term($term,$syncv,0,1);
  456. if(!$syntype){
  457. tripal_cv_obo_quiterror("Cannot add synonym type: internal:$type");
  458. }
  459. }
  460. // iterate through all of the aliases and add each one
  461. foreach($aliases as $alias){
  462. print " Adding Alias $alias\n";
  463. // check to see if the alias already exists in the synonym table
  464. // if not, then add it
  465. $synsql = "SELECT * FROM {synonym}
  466. WHERE name = '%s' and type_id = %d";
  467. $synonym = db_fetch_object(db_query($synsql,$alias,$syntype->cvterm_id));
  468. if(!$synonym){
  469. $sql = "INSERT INTO {synonym}
  470. (name,type_id,synonym_sgml)
  471. VALUES ('%s',%d,'%s')";
  472. $result = db_query($sql,$alias,$syntype->cvterm_id,'');
  473. if(!$result){
  474. print "ERROR: cannot add alias $alias to synonym table\n";
  475. }
  476. }
  477. $synonym = db_fetch_object(db_query($synsql,$alias,$syntype->cvterm_id));
  478. // check to see if we have a NULL publication in the pub table. If not,
  479. // then add one.
  480. $pubsql = "SELECT * FROM {pub} WHERE uniquename = 'null'";
  481. $pub = db_fetch_object(db_query($pubsql));
  482. if(!$pub){
  483. $sql = "INSERT INTO pub (uniquename,type_id) VALUES ('%s',
  484. (SELECT cvterm_id
  485. FROM cvterm CVT
  486. INNER JOIN dbxref DBX on DBX.dbxref_id = CVT.dbxref_id
  487. INNER JOIN db DB on DB.db_id = DBX.db_id
  488. WHERE CVT.name = 'null' and DB.name = 'null')";
  489. $result = db_query($sql,'null');
  490. if(!$result){
  491. print "ERROR: cannot add null publication needed for setup of alias\n";
  492. return 0;
  493. }
  494. }
  495. $pub = db_fetch_object(db_query($pubsql));
  496. // check to see if the synonym exists in the feature_synonym table
  497. // if not, then add it.
  498. $synsql = "SELECT * FROM {feature_synonym}
  499. WHERE synonym_id = %d and feature_id = %d and pub_id = %d";
  500. $fsyn = db_fetch_object(db_query($synsql,$synonym->synonym_id,$feature->feature_id,$pub->pub_id));
  501. if(!$fsyn){
  502. $sql = "INSERT INTO {feature_synonym}
  503. (synonym_id,feature_id,pub_id)
  504. VALUES (%d,%d,%d)";
  505. $result = db_query($sql,$synonym->synonym_id,$feature->feature_id,$pub->pub_id);
  506. if(!$result){
  507. print "ERROR: cannot add alias $alias to feature synonym table\n";
  508. return 0;
  509. }
  510. }
  511. $fsyn = db_fetch_object(db_query($synsql,$synonym->synonym_id,$feature->feature_id,$pub->pub_id));
  512. }
  513. return 1;
  514. }
  515. /*************************************************************************
  516. *
  517. */
  518. function tripal_core_load_gff3_feature($organism,$cvterm,$uniquename,$name,
  519. $residues,$is_analysis='f',$is_obsolete='f',$add_only,$generate_name) {
  520. if($generate_name){
  521. $uniquename = 'tripal_temp_loading_name' . rand(1,99999999);
  522. }
  523. // check to see if the feature already exists
  524. $feature_sql = "SELECT * FROM {feature}
  525. WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
  526. $feature = db_fetch_object(db_query($feature_sql,$organism->organism_id,$uniquename,$cvterm->cvterm_id));
  527. if(strcmp($is_obsolete,'f')==0){
  528. $is_obsolete = 'false';
  529. }
  530. if(strcmp($is_analysis,'f')==0){
  531. $is_analysis = 'false';
  532. }
  533. // insert the feature if it does not exist otherwise perform an update
  534. if(!$feature){
  535. print "Adding feature '$uniquename' ($cvterm->name)\n";
  536. $isql = "INSERT INTO {feature} (organism_id, name, uniquename, residues, seqlen,
  537. md5checksum, type_id,is_analysis,is_obsolete)
  538. VALUES(%d,'%s','%s','%s',%d, '%s', %d, %s, %s)";
  539. $result = db_query($isql,$organism->organism_id,$name,$uniquename,$residues,strlen($residues),
  540. md5($residues),$cvterm->cvterm_id,$is_analysis,$is_obsolete);
  541. if(!$result){
  542. print "ERROR: failed to insert feature '$uniquename' ($cvterm->name)\n";
  543. return 0;
  544. }
  545. }
  546. elseif(!$add_only) {
  547. print "Updating feature '$uniquename' ($cvterm->name)\n";
  548. $usql = "UPDATE {feature}
  549. SET name = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s',
  550. is_analysis = %s, is_obsolete = %s
  551. WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
  552. $result = db_query($usql,$name,$residues,strlen($residues),md5($residues),$is_analysis,$is_obsolete,
  553. $organism_id,$uniquename,$cvterm->cvterm_id);
  554. if(!$result){
  555. print "ERROR: failed to update feature '$uniquename' ($cvterm->name)\n";
  556. return 0;
  557. }
  558. }
  559. else {
  560. // the feature exists and we don't want to update it so return
  561. // a value of 0. This will stop all downstream property additions
  562. print "Skipping existing feature: '$uniquename' ($cvterm->name).\n";
  563. return 0;
  564. }
  565. $feature = db_fetch_object(db_query($feature_sql,$organism->organism_id,$uniquename,$cvterm->cvterm_id));
  566. // now that we've added the feature let's reset it's uniquename to be
  567. // the feature id
  568. if($generate_name){
  569. $usql = "UPDATE {feature}
  570. SET name = '%s', uniquename = '%s'
  571. WHERE feature_id = %d";
  572. print " Renaming feature '$uniquename' => ";
  573. $uniquename = "$feature->feature_id";
  574. print "$uniquename\n";
  575. $result = db_query($usql,$uniquename,$uniquename,$feature_id);
  576. if(!$result){
  577. print "ERROR: failed to update unnamed feature '$uniquename' with generic name\n";
  578. return 0;
  579. }
  580. $feature->name = $uniquename;
  581. $feature->uniquename = $uniquename;
  582. }
  583. return $feature;
  584. }
  585. /*************************************************************************
  586. *
  587. */
  588. function tripal_core_load_gff3_featureloc($feature,$organism,$landmark,$fmin,
  589. $fmax,$strand,$phase,$is_fmin_partial,$is_fmax_partial,$residue_info,$locgroup)
  590. {
  591. // get the source feature
  592. $sql = "SELECT * FROM {feature}
  593. WHERE organism_id = %d and uniquename = '%s'";
  594. $srcfeature = db_fetch_object(db_query($sql,$organism->organism_id,$landmark));
  595. if(!$srcfeature){
  596. print "ERROR: cannot find landmark feature $landmark. Cannot add the feature location record\n";
  597. return 0;
  598. }
  599. // TODO: create an attribute that recognizes the residue_info,locgroup, is_fmin_partial and is_fmax_partial, right now these are
  600. // hardcoded to be false and 0 below.
  601. // check to see if this featureloc already exists, but also keep track of the
  602. // last rank value
  603. $rank = -1;
  604. $exists = 0;
  605. $featureloc_sql = "SELECT FL.featureloc_id,FL.fmin,FL.fmax, FL.is_fmin_partial,
  606. FL.is_fmax_partial, FL.strand, FL.phase, FL.residue_info,
  607. FL.locgroup, F.uniquename as srcname
  608. FROM {featureloc} FL
  609. INNER JOIN {feature} F on F.feature_id = FL.srcfeature_id
  610. WHERE FL.feature_id = %d
  611. ORDER BY rank ASC";
  612. $recs = db_query($featureloc_sql,$feature->feature_id);
  613. while ($featureloc = db_fetch_object($recs)){
  614. if(strcmp($featureloc->srcname,$landmark)==0 and
  615. $featureloc->fmin == $fmin and strcmp($featureloc->is_fmin_partial,$is_fmin_partial)==0 and
  616. $featureloc->fmax == $fmax and strcmp($featureloc->is_fmax_partial,$is_fmax_partial)==0 and
  617. $featureloc->phase == $phase and $featureloc->strand == $strand and
  618. strcmp($featureloc->residue_info,$residue_info)==0 and
  619. $featureloc->locgroup == $locgroup){
  620. // this is the same featureloc, so do nothing... no need to update
  621. //TODO: need more checks here
  622. print " No change to featureloc\n";
  623. $exists = 1;
  624. }
  625. $rank = $featureloc->rank;
  626. }
  627. if(!$exists){
  628. $rank++;
  629. // this feature location is new so add it
  630. if(!$phase){
  631. $phase = 'NULL';
  632. }
  633. if(strcmp($is_fmin_partial,'f')==0){
  634. $is_fmin_partial = 'false';
  635. }
  636. elseif(strcmp($is_fmin_partial,'t')==0){
  637. $is_fmin_partial = 'true';
  638. }
  639. if(strcmp($is_fmax_partial,'f')==0){
  640. $is_fmax_partial = 'false';
  641. }
  642. elseif(strcmp($is_fmax_partial,'t')==0){
  643. $is_fmax_partial = 'true';
  644. }
  645. print " Adding featureloc $srcfeature->uniquename fmin: $fmin, fmax: $fmax, strand: $strand, phase: $phase, rank: $rank\n";
  646. $fl_isql = "INSERT INTO {featureloc}
  647. (feature_id, srcfeature_id, fmin, is_fmin_partial, fmax, is_fmax_partial,
  648. strand, phase, residue_info, locgroup, rank)
  649. VALUES (%d,%d,%d,%s,%d,%s,%d,%s,'%s',%d,%d)";
  650. $result = db_query($fl_isql,$feature->feature_id,$srcfeature->feature_id,$fmin,$is_fmin_partial,$fmax,$is_fmax_partial,
  651. $strand,$phase,$residue_info,$locgroup,$rank);
  652. if(!$result){
  653. print "ERROR: failed to insert featureloc\n";
  654. exit;
  655. return 0;
  656. }
  657. }
  658. return 1;
  659. }
  660. /*************************************************************************
  661. *
  662. */