update_description.sh 1.8 KB

123456789101112131415161718192021222324252627282930313233
  1. #!/bin/bash
  2. #update_description.sh
  3. DB_USER='root' #'your_db_username'
  4. DB_PASS='root' #'your_password'
  5. DB='my_genie_sys_database' #'database_name'
  6. # if less than two arguments supplied, display error message
  7. if [ $# -le 0 ]
  8. then
  9. start='\033[0;33m'
  10. start_0='\033[0;33m'
  11. start_2='\033[0;31m'
  12. end='\033[0m'
  13. echo "\nUsage:\n$0 ${start}[gene_info/transcript_info] [file_name]${end}\nEx: ${start_2}sh update_descriptions.sh transcript_info/gene_info potra_descriptions.tsv${end}\n\nWhat it does?\n${start_0}This script will create a two columns(ids, descriptions) temporary table and load the [file_name] into it.\nThen it will match ids column in temporary table with transcript_ids/gene_ids and update the gene/transcript descriptions.\nFinally delete the temporary table.\n${end}"
  14. exit 1
  15. fi
  16. table_name=$(echo $1 | awk '{split($0,a,"_");print a[1]}');
  17. tmp_field_name=$table_name"_id"
  18. mysql --host=localhost --user=$DB_USER --password=$DB_PASS --local_infile=1 --database=$DB <<EOFMYSQL
  19. DROP TEMPORARY TABLE IF EXISTS tmp_tb ;
  20. UPDATE $1 SET description = '';
  21. CREATE TEMPORARY TABLE tmp_tb(gene_name VARCHAR(255),annotation VARCHAR(500), PRIMARY KEY(gene_name));
  22. load data local infile '$2' replace INTO TABLE tmp_tb fields terminated by '\t' LINES TERMINATED BY '\n' ignore 0 lines;
  23. #DROP INDEX if exists $tmp_field_name ON $tmp_field_name;
  24. #ALTER TABLE $1 ADD INDEX $tmp_field_name ($tmp_field_name);
  25. UPDATE $1 INNER JOIN tmp_tb on tmp_tb.gene_name = $1.$tmp_field_name SET $1.description = tmp_tb.annotation;
  26. #select * from tmp_tb limit 100;
  27. DROP TEMPORARY TABLE tmp_tb;
  28. EOFMYSQL
  29. #Following line is very important when we load data into the corresponding table. If we did not make index it takes so long time to load.
  30. #ALTER TABLE transcript_info ADD INDEX `transcript_id` (`transcript_id`)