tripal_chado.gff_exporter.inc 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. <?php
  2. function tripal_chado_gff3_exporter($source, $filters) {
  3. // Make sure the filters array is setup properly.
  4. if (!array_key_exists('types', $filters) and
  5. !array_key_exists('feature_id', $filters)) {
  6. tripal_report_error("tripal_chado", TRIPAL_ERROR,
  7. "Please provide one or more types to include in the exported GFF file.",
  8. []);
  9. exit;
  10. }
  11. $select = "
  12. SELECT SF.uniquename as landmark_uname, SF.name as landmark_name,
  13. F.feature_id, F.dbxref_id, F.uniquename, F.name, CVT.name as type,
  14. FL.fmin, FL.fmax, FL.strand, FL.phase
  15. ";
  16. $from = "
  17. FROM {featureloc} FL
  18. INNER JOIN {feature} F on FL.feature_id = F.feature_id
  19. INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
  20. INNER JOIN {feature} SF on FL.srcfeature_id = SF.feature_id
  21. ";
  22. $where = "
  23. WHERE 1=1
  24. ";
  25. $order = "
  26. ORDER BY SF.uniquename, FL.fmin
  27. ";
  28. $args = [];
  29. // Filter by organism
  30. if (array_key_exists('genus', $filters) or array_key_exists('species', $filters)) {
  31. $from .= "INNER JOIN {organism} O on F.organism_id = O.organism_id ";
  32. if (array_key_exists('genus', $filters)) {
  33. $where .= "AND O.genus = :genus ";
  34. $args[':genus'] = $filters['genus'];
  35. }
  36. if (array_key_exists('species', $filters)) {
  37. $where .= "AND O.species = :species ";
  38. $args[':species'] = $filters['species'];
  39. }
  40. }
  41. if (array_key_exists('types', $filters)) {
  42. if (is_array($filters['types'])) {
  43. $where .= "AND CVT.name IN (:types) ";
  44. $args[':types'] = $filters['types'];
  45. }
  46. else {
  47. tripal_report_error("tripal_chado", TRIPAL_ERROR,
  48. "The 'types' element of the filters array must be an array.",
  49. []);
  50. exit;
  51. }
  52. }
  53. // Filter by exact feature_id
  54. if (array_key_exists('feature_id', $filters)) {
  55. $where .= "AND F.feature_id = :feature_id ";
  56. $args[':feature_id'] = $filters['feature_id'];
  57. }
  58. // Filter by analysis
  59. if (array_key_exists('analysis_id', $filters) or
  60. array_key_exists('analysis_name', $filters)) {
  61. $from .= "INNER JOIN {analysisfeature} AF on AF.feature_id = F.feature_id ";
  62. if (array_key_exists('analysis_id', $filters)) {
  63. $where .= "AND AF.analysis_id = :analysis_id ";
  64. $args[':analysis_id'] = $filters['analysis_id'];
  65. }
  66. if (array_key_exists('analysis_name', $filters)) {
  67. $from .= "INNER JOIN {analysis} A on AF.analysis_id = A.analysis_id ";
  68. $where .= "AND A.name = :analysis_name ";
  69. $args[':analysis_name'] = $filters['analysis_name'];
  70. }
  71. }
  72. $sql = "$select $from $where $order";
  73. // The SQL statement for feature properties.
  74. $props_sql = "
  75. SELECT CVT.name, FP.value
  76. FROM {featureprop} FP
  77. INNER JOIN {cvterm} CVT on CVT.cvterm_id = FP.type_id
  78. WHERE FP.feature_id = :feature_id
  79. ORDER BY CVT.name
  80. ";
  81. // The SQL statement for Dbxrefs associated with this feature
  82. $dbxref_sql = "
  83. SELECT DB.name as dbname, DBX.accession
  84. FROM {dbxref} DBX
  85. INNER JOIN {db} DB on DB.db_id = DBX.db_id
  86. WHERE DBX.dbxref_id = :dbxref_id
  87. UNION
  88. SELECT DB.name, DBX.accession
  89. FROM {feature_dbxref} FDBX
  90. INNER JOIN {dbxref} DBX on DBX.dbxref_id = FDBX.dbxref_id
  91. INNER JOIN {db} DB on DB.db_id = DBX.db_id
  92. WHERE FDBX.feature_id = :feature_id
  93. ";
  94. // The SQL statement for CVTerms assigned to this feature.
  95. $cvterm_sql = "
  96. SELECT CV.name as db_name, DBX.accession
  97. FROM {feature_cvterm} FCVT
  98. INNER JOIN {cvterm} CVT on CVT.cvterm_id = FCVT.cvterm_id
  99. INNER JOIN {cv} CV on CV.cv_id = CVT.cv_id
  100. INNER JOIN {dbxref} DBX on CVT.dbxref_id = DBX.dbxref_id
  101. WHERE FCVT.feature_id = :feature_id
  102. ";
  103. // The SQL for finding the parents of this feature.
  104. $parent_sql = "
  105. SELECT F.name, F.uniquename, F.feature_id
  106. FROM {feature_relationship} FR
  107. INNER JOIN {cvterm} CVT on CVT.cvterm_id = FR.type_id
  108. INNER JOIN {feature} F on FR.object_id = F.feature_id
  109. WHERE CVT.name = 'part_of' AND FR.subject_id = :feature_id
  110. ";
  111. // The SQL for aliases of this feature.
  112. $alias_sql = "
  113. SELECT S.name
  114. FROM {feature_synonym} FS
  115. INNER JOIN {synonym} S on FS.synonym_id = S.synonym_id
  116. WHERE FS.feature_id = :feature_id
  117. ";
  118. // Get the score
  119. $score_sql = "
  120. SELECT rawscore as score
  121. FROM {analysisfeature} AF
  122. WHERE feature_id = :feature_id
  123. ORDER BY AF.analysis_id
  124. OFFSET 0 LIMIT 1
  125. ";
  126. $results = chado_query($sql, $args);
  127. while ($line = $results->fetchObject()) {
  128. // Get the score for this feature
  129. $score = chado_query($score_sql, [':feature_id' => $line->feature_id])->fetchField();
  130. print $line->landmark_uname . "\t";
  131. print $source . "\t";
  132. print $line->type . "\t";
  133. print ($line->fmin + 1) . "\t";
  134. print $line->fmax . "\t";
  135. if ($score) {
  136. print $score . "\t";
  137. }
  138. else {
  139. print "." . "\t";
  140. }
  141. if ($line->strand) {
  142. print $line->strand . "\t";
  143. }
  144. else {
  145. print '.' . "\t";
  146. }
  147. if ($line->phase) {
  148. print $line->phase . "\t";
  149. }
  150. else {
  151. print '.' . "\t";
  152. }
  153. print "ID=" . $line->uniquename . ";Name=" . $line->name . ";";
  154. // Look for a parent of this feature
  155. $args = [':feature_id' => $line->feature_id];
  156. $parents = chado_query($parent_sql, $args);
  157. $attr = '';
  158. while ($parent = $parents->fetchObject()) {
  159. $attr .= $parent->uniquename . ",";
  160. }
  161. if ($attr) {
  162. print "Parent=" . substr($attr, 0, -1) . ";";
  163. }
  164. // Add in any aliases
  165. $args = [':feature_id' => $line->feature_id];
  166. $aliases = chado_query($alias_sql, $args);
  167. $attr = '';
  168. while ($alias = $aliases->fetchObject()) {
  169. $attr .= $alias->name . ",";
  170. }
  171. if ($attr) {
  172. print "Alias=" . substr($attr, 0, -1) . ";";
  173. }
  174. $props = chado_query($props_sql, [':feature_id' => $line->feature_id]);
  175. $prop_name = '';
  176. while ($prop = $props->fetchobject()) {
  177. // If this is the first time we've encountered this property then
  178. // add the name=value key pair.
  179. if ($prop_name != $prop->name) {
  180. if ($prop_name) {
  181. // End the previous property
  182. print ";";
  183. }
  184. // TODO: urlencode the properties
  185. print $prop->name . "=" . $prop->value;
  186. $prop_name = $prop->name;
  187. }
  188. // If we've seen this property before then just add the value.
  189. else {
  190. print "," . $prop->value;
  191. }
  192. }
  193. // End the last property
  194. if ($prop_name) {
  195. print ";";
  196. }
  197. // Add in any DBXref records
  198. $args = [
  199. ':feature_id' => $line->feature_id,
  200. ':dbxref_id' => $line->dbxref_id,
  201. ];
  202. $dbxrefs = chado_query($dbxref_sql, $args);
  203. $xrefs = '';
  204. while ($dbxref = $dbxrefs->fetchObject()) {
  205. if ($dbxref->dbname = 'GFF_source') {
  206. continue;
  207. }
  208. $xrefs .= $dbxref->dbname . ":" . $dbxref->accession . ",";
  209. }
  210. if ($xrefs) {
  211. print "Dbxref=" . substr($xrefs, 0, -1) . ";";
  212. }
  213. // Add in any CVTerm records
  214. $args = [':feature_id' => $line->feature_id];
  215. $cvterms = chado_query($cvterm_sql, $args);
  216. $xrefs = '';
  217. while ($cvterm = $cvterms->fetchObject()) {
  218. $xrefs .= $cvterm->db_name . ":" . $cvterm->accession . ",";
  219. }
  220. if ($xrefs) {
  221. print "Ontology_term=" . substr($xrefs, 0, -1) . ";";
  222. }
  223. print "\n";
  224. // Look for children of this feature and recursively add them.
  225. $children_sql = "
  226. SELECT FR.subject_id
  227. FROM {feature_relationship} FR
  228. INNER JOIN {cvterm} CVT on CVT.cvterm_id = FR.type_id
  229. WHERE CVT.name = 'part_of' AND FR.object_id = :feature_id
  230. ";
  231. $children = chado_query($children_sql, [':feature_id' => $line->feature_id]);
  232. while ($child = $children->fetchObject()) {
  233. $child_filters = [
  234. 'feature_id' => $child->subject_id,
  235. ];
  236. tripal_chado_gff3_exporter($source, $child_filters);
  237. }
  238. }
  239. }