ChadoSchema.inc 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652
  1. <?php
  2. /**
  3. * Provides an application programming interface (API) for describing Chado
  4. * tables.
  5. *
  6. * If you need the Drupal-style array definition for any table, use the
  7. * following:
  8. *
  9. * @code
  10. *
  11. * $chado_schema = new \ChadoSchema();
  12. * $table_schema = $chado_schema->getTableSchema($table_name);
  13. * @endcode
  14. *
  15. * where the variable $table contains the name of the table you want to
  16. * retireve. The getTableSchema method determines the appropriate version of
  17. * Chado and uses the Drupal hook infrastructure to call the appropriate
  18. * hook function to retrieve the table schema.
  19. *
  20. * Additionally, here are some other examples of how to use this class:
  21. * @code
  22. *
  23. * // Retrieve the schema array for the organism table in chado 1.2
  24. * $chado_schema = new \ChadoSchema('1.2');
  25. * $table_schema = $chado_schema->getTableSchema('organism');
  26. *
  27. * // Retrieve all chado tables.
  28. * $chado_schema = new \ChadoSchema();
  29. * $tables = $chado_schema->getTableNames();
  30. * $base_tables = $chado_schema->getbaseTables();
  31. *
  32. * // Check the feature.type_id foreign key constraint
  33. * $chado_schema = new \ChadoSchema();
  34. * $exists = $chado_schema ->checkFKConstraintExists('feature','type_id');
  35. *
  36. * // Check Sequence exists
  37. * $chado_schema = new \ChadoSchema();
  38. * $exists = $chado_schema->checkSequenceExists('organism','organism_id');
  39. * // Or just check the primary key directly
  40. * $compliant = $chado_schema->checkPrimaryKey('organism');
  41. * @endcode
  42. */
  43. class ChadoSchema {
  44. /**
  45. * @var string
  46. * The current version for this site. E.g. "1.3".
  47. */
  48. protected $version = '';
  49. /**
  50. * @var string
  51. * The name of the schema chado was installed in.
  52. */
  53. protected $schema_name = 'chado';
  54. /**
  55. * The ChadoSchema constructor.
  56. *
  57. * @param string $version
  58. * The current version for this site. E.g. "1.3". If a version is not
  59. * provided, the version of the current database will be looked up.
  60. */
  61. public function __construct($version = NULL, $schema_name = NULL) {
  62. // Set the version of the schema.
  63. if ($version === NULL) {
  64. $this->version = chado_get_version(TRUE);
  65. }
  66. else {
  67. $this->version = $version;
  68. }
  69. // Set the name of the schema.
  70. if ($schema_name === NULL) {
  71. $this->schema_name = chado_get_schema_name('chado');
  72. }
  73. else {
  74. $this->schema_name = $schema_name;
  75. }
  76. // Check functions require the chado schema be local and installed...
  77. // So lets check that now...
  78. if (!chado_is_local()) {
  79. tripal_report_error(
  80. 'ChadoSchema',
  81. TRIPAL_NOTICE,
  82. 'The ChadoSchema class requires chado be installed within the drupal database
  83. in a separate schema for any compliance checking functionality.'
  84. );
  85. }
  86. if (!chado_is_installed()) {
  87. tripal_report_error(
  88. 'ChadoSchema',
  89. TRIPAL_NOTICE,
  90. 'The ChadoSchema class requires chado be installed
  91. for any compliance checking functionality.'
  92. );
  93. }
  94. }
  95. /**
  96. * Returns the version number of the Chado this object references.
  97. *
  98. * @returns
  99. * The version of Chado
  100. */
  101. public function getVersion() {
  102. return $this->version;
  103. }
  104. /**
  105. * Retrieve the name of the PostgreSQL schema housing Chado.
  106. *
  107. * @return
  108. * The name of the schema.
  109. */
  110. public function getSchemaName() {
  111. return $this->schema_name;
  112. }
  113. /**
  114. * Retrieves the list of tables in the Chado schema. By default it only
  115. * returns the default Chado tables, but can return custom tables added to
  116. * the Chado schema if requested.
  117. *
  118. * @param $include_custom
  119. * Optional. Set as TRUE to include any custom tables created in the
  120. * Chado schema. Custom tables are added to Chado using the
  121. * tripal_chado_chado_create_table() function.
  122. *
  123. * @returns
  124. * An associative array where the key and value pairs are the Chado table
  125. * names.
  126. */
  127. public function getTableNames($include_custom = FALSE) {
  128. $tables = [];
  129. if ($this->version == '1.3') {
  130. $tables_v1_3 = tripal_chado_chado_get_v1_3_tables();
  131. foreach ($tables_v1_3 as $table) {
  132. $tables[$table] = $table;
  133. }
  134. }
  135. if ($this->version == '1.2') {
  136. $tables_v1_2 = tripal_chado_chado_get_v1_2_tables();
  137. foreach ($tables_v1_2 as $table) {
  138. $tables[$table] = $table;
  139. }
  140. }
  141. if ($this->version == '1.11' or $this->version == '1.11 or older') {
  142. $tables_v1_11 = tripal_chado_chado_get_v1_11_tables();
  143. foreach ($tables_v1_11 as $table) {
  144. $tables[$table] = $table;
  145. }
  146. }
  147. // now add in the custom tables too if requested
  148. if ($include_custom) {
  149. $sql = "SELECT table FROM {tripal_custom_tables}";
  150. $resource = db_query($sql);
  151. foreach ($resource as $r) {
  152. $tables[$r->table] = $r->table;
  153. }
  154. }
  155. asort($tables);
  156. return $tables;
  157. }
  158. /**
  159. * Retrieves the chado tables Schema API array.
  160. *
  161. * @param $table
  162. * The name of the table to retrieve. The function will use the appopriate
  163. * Tripal chado schema API hooks (e.g. v1.11 or v1.2).
  164. *
  165. * @returns
  166. * A Drupal Schema API array defining the table.
  167. */
  168. public function getTableSchema($table) {
  169. // first get the chado version.
  170. $v = $this->version;
  171. // get the table array from the proper chado schema
  172. $v = preg_replace("/\./", "_", $v); // reformat version for hook name
  173. // Call the module_invoke_all.
  174. $hook_name = "chado_schema_v" . $v . "_" . $table;
  175. $table_arr = module_invoke_all($hook_name);
  176. // If the module_invoke_all returned nothing then let's make sure there isn't
  177. // An API call we can call directly. The only time this occurs is
  178. // during an upgrade of a major Drupal version and tripal_core is disabled.
  179. if ((!$table_arr or !is_array($table_arr)) and
  180. function_exists('tripal_chado_' . $hook_name)) {
  181. $api_hook = "tripal_chado_" . $hook_name;
  182. $table_arr = $api_hook();
  183. }
  184. // if the table_arr is empty then maybe this is a custom table
  185. if (!is_array($table_arr) or count($table_arr) == 0) {
  186. $table_arr = $this->getCustomTableSchema($table);
  187. }
  188. return $table_arr;
  189. }
  190. /**
  191. * Retrieves the schema array for the specified custom table.
  192. *
  193. * @param $table
  194. * The name of the table to create.
  195. *
  196. * @return
  197. * A Drupal-style Schema API array definition of the table. Returns
  198. * FALSE on failure.
  199. */
  200. public function getCustomTableSchema($table) {
  201. $sql = "SELECT schema FROM {tripal_custom_tables} WHERE table_name = :table_name";
  202. $results = db_query($sql, [':table_name' => $table]);
  203. $custom = $results->fetchObject();
  204. if (!$custom) {
  205. return FALSE;
  206. }
  207. else {
  208. return unserialize($custom->schema);
  209. }
  210. }
  211. /**
  212. * Returns all chado base tables.
  213. *
  214. * Base tables are those that contain the primary record for a data type.
  215. * For
  216. * example, feature, organism, stock, are all base tables. Other tables
  217. * include linker tables (which link two or more base tables), property
  218. * tables, and relationship tables. These provide additional information
  219. * about primary data records and are therefore not base tables. This
  220. * function retreives only the list of tables that are considered 'base'
  221. * tables.
  222. *
  223. * @return
  224. * An array of base table names.
  225. *
  226. * @ingroup tripal_chado_schema_api
  227. */
  228. function getBaseTables() {
  229. // Initialize the base tables with those tables that are missing a type.
  230. // Ideally they should have a type, but that's for a future version of Chado.
  231. $base_tables = [
  232. 'organism',
  233. 'project',
  234. 'analysis',
  235. 'biomaterial',
  236. 'eimage',
  237. 'assay',
  238. ];
  239. // We'll use the cvterm table to guide which tables are base tables. Typically
  240. // base tables (with a few exceptions) all have a type. Iterate through the
  241. // referring tables.
  242. $schema = $this->getTableSchema('cvterm');
  243. $referring = $schema['referring_tables'];
  244. foreach ($referring as $tablename) {
  245. // Ignore the cvterm tables, relationships, chadoprop tables.
  246. if ($tablename == 'cvterm_dbxref' || $tablename == 'cvterm_relationship' ||
  247. $tablename == 'cvtermpath' || $tablename == 'cvtermprop' || $tablename == 'chadoprop' ||
  248. $tablename == 'cvtermsynonym' || preg_match('/_relationship$/', $tablename) ||
  249. preg_match('/_cvterm$/', $tablename) ||
  250. // Ignore prop tables
  251. preg_match('/prop$/', $tablename) || preg_match('/prop_.+$/', $tablename) ||
  252. // Ignore nd_tables
  253. preg_match('/^nd_/', $tablename)) {
  254. continue;
  255. }
  256. else {
  257. array_push($base_tables, $tablename);
  258. }
  259. }
  260. // Remove any linker tables that have snuck in. Linker tables are those
  261. // whose foreign key constraints link to two or more base table.
  262. $final_list = [];
  263. foreach ($base_tables as $i => $tablename) {
  264. // A few tables break our rule and seems to look
  265. // like a linking table, but we want to keep it as a base table.
  266. if ($tablename == 'biomaterial' or $tablename == 'assay' or $tablename == 'arraydesign') {
  267. $final_list[] = $tablename;
  268. continue;
  269. }
  270. // Remove the phenotype table. It really shouldn't be a base table as
  271. // it is meant to store individual phenotype measurements.
  272. if ($tablename == 'phenotype') {
  273. continue;
  274. }
  275. $num_links = 0;
  276. $schema = $this->getTableSchema($tablename);
  277. $fkeys = $schema['foreign keys'];
  278. foreach ($fkeys as $fkid => $details) {
  279. $fktable = $details['table'];
  280. if (in_array($fktable, $base_tables)) {
  281. $num_links++;
  282. }
  283. }
  284. if ($num_links < 2) {
  285. $final_list[] = $tablename;
  286. }
  287. }
  288. // Now add in the cvterm table to the list.
  289. $final_list[] = 'cvterm';
  290. // Sort the tables and return the list.
  291. sort($final_list);
  292. return $final_list;
  293. }
  294. /**
  295. * Get information about which Chado base table a cvterm is mapped to.
  296. *
  297. * Vocbulary terms that represent content types in Tripal must be mapped to
  298. * Chado tables. A cvterm can only be mapped to one base table in Chado.
  299. * This function will return an object that contains the chado table and
  300. * foreign key field to which the cvterm is mapped. The 'chado_table'
  301. * property of the returned object contains the name of the table, and the
  302. * 'chado_field' property contains the name of the foreign key field (e.g.
  303. * type_id), and the
  304. * 'cvterm' property contains a cvterm object.
  305. *
  306. * @params
  307. * An associative array that contains the following keys:
  308. * - cvterm_id: the cvterm ID value for the term.
  309. * - vocabulary: the short name for the vocabulary (e.g. SO, GO, PATO)
  310. * - accession: the accession for the term.
  311. * - bundle_id: the ID for the bundle to which a term is associated.
  312. * The 'vocabulary' and 'accession' must be used together, the 'cvterm_id'
  313. * can be used on it's own.
  314. *
  315. * @return
  316. * An object containing the chado_table and chado_field properties or NULL
  317. * if if no mapping was found for the term.
  318. */
  319. public function getCvtermMapping($params) {
  320. return chado_get_cvterm_mapping($params);
  321. }
  322. /**
  323. * Check that any given Chado table exists.
  324. *
  325. * This function is necessary because Drupal's db_table_exists() function will
  326. * not look in any other schema but the one where Drupal is installed
  327. *
  328. * @param $table
  329. * The name of the chado table whose existence should be checked.
  330. *
  331. * @return
  332. * TRUE if the table exists in the chado schema and FALSE if it does not.
  333. */
  334. public function checkTableExists($table) {
  335. return chado_table_exists($table);
  336. }
  337. /**
  338. * Check that any given column in a Chado table exists.
  339. *
  340. * This function is necessary because Drupal's db_field_exists() will not
  341. * look in any other schema but the one were Drupal is installed
  342. *
  343. * @param $table
  344. * The name of the chado table.
  345. * @param $column
  346. * The name of the column in the chado table.
  347. *
  348. * @return
  349. * TRUE if the column exists for the table in the chado schema and
  350. * FALSE if it does not.
  351. *
  352. * @ingroup tripal_chado_schema_api
  353. */
  354. public function checkColumnExists($table, $column) {
  355. return chado_column_exists($table, $column);
  356. }
  357. /**
  358. * Check that any given column in a Chado table exists.
  359. *
  360. * This function is necessary because Drupal's db_field_exists() will not
  361. * look in any other schema but the one were Drupal is installed
  362. *
  363. * @param $table
  364. * The name of the chado table.
  365. * @param $column
  366. * The name of the column in the chado table.
  367. * @param $type
  368. * (OPTIONAL) The PostgreSQL type to check for. If not supplied it will be
  369. * looked up via the schema (PREFERRED).
  370. *
  371. * @return
  372. * TRUE if the column type matches what we expect and
  373. * FALSE if it does not.
  374. *
  375. * @ingroup tripal_chado_schema_api
  376. */
  377. public function checkColumnType($table, $column, $expected_type = NULL) {
  378. // Ensure this column exists before moving forward.
  379. if (!$this->checkColumnExists($table, $column)) {
  380. tripal_report_error(
  381. 'ChadoSchema',
  382. TRIPAL_WARNING,
  383. 'Unable to check the type of !table!column since it doesn\'t appear to exist in your site database.',
  384. ['!column' => $column, '!table' => $table]
  385. );
  386. return FALSE;
  387. }
  388. // Look up the type using the Schema array.
  389. if ($expected_type === NULL) {
  390. $schema = $this->getTableSchema($table, $column);
  391. if (is_array($schema) AND isset($schema['fields'][$column])) {
  392. $expected_type = $schema['fields'][$column]['type'];
  393. }
  394. else {
  395. tripal_report_error(
  396. 'ChadoSchema',
  397. TRIPAL_WARNING,
  398. 'Unable to check the type of !table!column due to being unable to find the schema definition.',
  399. ['!column' => $column, '!table' => $table]
  400. );
  401. return FALSE;
  402. }
  403. }
  404. // There is some flexibility in the expected type...
  405. // Fix that here.
  406. switch ($expected_type) {
  407. case 'int':
  408. $expected_type = 'integer';
  409. break;
  410. case 'serial':
  411. $expected_type = 'integer';
  412. break;
  413. case 'varchar':
  414. $expected_type = 'character varying';
  415. break;
  416. case 'datetime':
  417. $expected_type = 'timestamp without time zone';
  418. break;
  419. case 'char':
  420. $expected_type = 'character';
  421. break;
  422. }
  423. // Grab the type from the current database.
  424. $query = 'SELECT data_type
  425. FROM information_schema.columns
  426. WHERE
  427. table_name = :table AND
  428. column_name = :column AND
  429. table_schema = :schema
  430. ORDER BY ordinal_position
  431. LIMIT 1';
  432. $type = db_query($query,
  433. [
  434. ':table' => $table,
  435. ':column' => $column,
  436. ':schema' => $this->schema_name,
  437. ])->fetchField();
  438. // Finally we do the check!
  439. if ($type === $expected_type) {
  440. return TRUE;
  441. }
  442. elseif (($expected_type == 'float') AND (($type == 'double precision') OR ($type == 'real'))) {
  443. return TRUE;
  444. }
  445. elseif ($type == 'smallint' AND $expected_type == 'integer') {
  446. return TRUE;
  447. }
  448. elseif ($type == 'bigint' AND $expected_type == 'integer') {
  449. return TRUE;
  450. }
  451. else {
  452. return FALSE;
  453. }
  454. }
  455. /**
  456. * Check that any given sequence in a Chado table exists.
  457. *
  458. * @param table
  459. * The name of the table the sequence is used in.
  460. * @param column
  461. * The name of the column the sequence is used to populate.
  462. *
  463. * @return
  464. * TRUE if the seqeuence exists in the chado schema and FALSE if it does
  465. * not.
  466. *
  467. * @ingroup tripal_chado_schema_api
  468. */
  469. public function checkSequenceExists($table, $column) {
  470. $prefixed_table = $this->schema_name . '.' . $table;
  471. $sequence_name = db_query('SELECT pg_get_serial_sequence(:table, :column);',
  472. [':table' => $prefixed_table, ':column' => $column])->fetchField();
  473. // Remove prefixed table from sequence name
  474. $sequence_name = str_replace($this->schema_name . '.', '', $sequence_name);
  475. return chado_sequence_exists($sequence_name);
  476. }
  477. /**
  478. * Check that the primary key exists, has a sequence and a constraint.
  479. *
  480. * @param $table
  481. * The table you want to check the primary key for.
  482. * @param $column
  483. * (OPTIONAL) The name of the primary key column.
  484. *
  485. * @return
  486. * TRUE if the primary key meets all the requirements and false otherwise.
  487. */
  488. public function checkPrimaryKey($table, $column = NULL) {
  489. // If they didn't supply the column, then we can look it up.
  490. if ($column === NULL) {
  491. $table_schema = $this->getTableSchema($table);
  492. $column = $table_schema['primary key'][0];
  493. }
  494. // If there is no primary key then we can't check it.
  495. // It neither passes nore fails validation.
  496. if (empty($column)) {
  497. tripal_report_error(
  498. 'ChadoSchema',
  499. TRIPAL_NOTICE,
  500. 'Cannot check the validity of the primary key for "!table" since there is no record of one.',
  501. ['!table' => $table]
  502. );
  503. return NULL;
  504. }
  505. // Check the column exists.
  506. $column_exists = $this->checkColumnExists($table, $column);
  507. if (!$column_exists) {
  508. return FALSE;
  509. }
  510. // First check that the sequence exists.
  511. $sequence_exists = $this->checkSequenceExists($table, $column);
  512. if (!$sequence_exists) {
  513. return FALSE;
  514. }
  515. // Next check the constraint is there.
  516. $constraint_exists = chado_query(
  517. "SELECT 1
  518. FROM information_schema.table_constraints
  519. WHERE table_name=:table AND constraint_type = 'PRIMARY KEY'",
  520. [':table' => $table])->fetchField();
  521. if (!$constraint_exists) {
  522. return FALSE;
  523. }
  524. return TRUE;
  525. }
  526. /**
  527. * Check that the constraint exists.
  528. *
  529. * @param $table
  530. * The table the constraint applies to.
  531. * @param $constraint_name
  532. * The name of the constraint you want to check.
  533. * @param $type
  534. * The type of constraint. Should be one of "PRIMARY KEY", "UNIQUE", or
  535. * "FOREIGN KEY".
  536. *
  537. * @return
  538. * TRUE if the constraint exists and false otherwise.
  539. */
  540. function checkConstraintExists($table, $constraint_name, $type) {
  541. // Next check the constraint is there.
  542. $constraint_exists = chado_query(
  543. "SELECT 1
  544. FROM information_schema.table_constraints
  545. WHERE table_name=:table AND constraint_type = :type AND constraint_name = :name",
  546. [
  547. ':table' => $table,
  548. ':name' => $constraint_name,
  549. ':type' => $type,
  550. ])->fetchField();
  551. if (!$constraint_exists) {
  552. return FALSE;
  553. }
  554. return TRUE;
  555. }
  556. /**
  557. * Check the foreign key constrain specified exists.
  558. *
  559. * @param $base_table
  560. * The name of the table the foreign key resides in. E.g. 'feature' for
  561. * the feature.type_id => cvterm.cvterm_id foreign key.
  562. * @param $base_column
  563. * The name of the column that is a foreign key in. E.g. 'type_id' for
  564. * the feature.type_id => cvterm.cvterm_id foreign key.
  565. *
  566. * @return
  567. * TRUE if the constraint exists and false otherwise.
  568. */
  569. function checkFKConstraintExists($base_table, $base_column) {
  570. // Since we don't have a constraint name, we have to use the known pattern for
  571. // creating these names in order to make this check.
  572. // This is due to PostgreSQL not storing column information for constraints
  573. // in the information_schema tables.
  574. $constraint_name = $base_table . '_' . $base_column . '_fkey';
  575. return $this->checkConstraintExists($base_table, $constraint_name, 'FOREIGN KEY');
  576. }
  577. /**
  578. * A Chado-aware replacement for the db_index_exists() function.
  579. *
  580. * @param $table
  581. * The table to be altered.
  582. * @param $name
  583. * The name of the index.
  584. */
  585. function checkIndexExists($table, $name) {
  586. return chado_index_exists($table, $name);
  587. }
  588. }