|
@@ -92,6 +92,31 @@ function tripal_feature_gff3_load_form() {
|
|
|
you may specify the name of the attribute to use for the name."),
|
|
|
);
|
|
|
|
|
|
+ // Advanced Options
|
|
|
+ $form['advanced'] = array('#type' => 'fieldset','#title' => t('Advanced Options'),
|
|
|
+ '#collapsible' => TRUE,'#collapsed' => TRUE
|
|
|
+ );
|
|
|
+ $form['advanced']['re_help'] = array('#type' => 'item',
|
|
|
+ '#value' => t('A regular expression is an advanced method for extracting information from a string of text.
|
|
|
+ If your GFF3 file does not contain polypeptide (or protein) features, but contains CDS features, proteins will be automatically created.
|
|
|
+ By default the loader will give each protein a name based on the name of the corresponding mRNA followed by the "-protein" suffix.
|
|
|
+ If you want to customize the name of the created protein, you can use the following regex.')
|
|
|
+ );
|
|
|
+ $form['advanced']['re_mrna'] = array('#type' => 'textfield',
|
|
|
+ '#title' => t('Regular expression for the mRNA name'),'#required' => FALSE,
|
|
|
+ '#description' => t('Enter the regular expression that will extract portions of
|
|
|
+ the mRNA unique name. For example, for a
|
|
|
+ mRNA with a unique name finishing by -RX (e.g. SPECIES0000001-RA),
|
|
|
+ the regular expression would be, "^(.*?)-R([A-Z]+)$".')
|
|
|
+ );
|
|
|
+ $form['advanced']['re_protein'] = array('#type' => 'textfield',
|
|
|
+ '#title' => t('Replacement string for the protein name'),'#required' => FALSE,
|
|
|
+ '#description' => t('Enter the replacement string that will be used to create
|
|
|
+ the protein name based on the mRNA regular expression. For example, for a
|
|
|
+ mRNA regular expression "^(.*?)-R()[A-Z]+)$", the corresponding protein regular
|
|
|
+ expression would be "$1-P$2".')
|
|
|
+ );
|
|
|
+
|
|
|
$form['import_options'] = array(
|
|
|
'#type' => 'fieldset',
|
|
|
'#title' => t('Import Options'),
|
|
@@ -230,6 +255,8 @@ function tripal_feature_gff3_load_form_validate($form, &$form_state) {
|
|
|
$line_number = trim($form_state['values']['line_number']);
|
|
|
$landmark_type = trim($form_state['values']['landmark_type']);
|
|
|
$alt_id_attr = trim($form_state['values']['alt_id_attr']);
|
|
|
+ $re_mrna = trim($form_state['values']['re_mrna']);
|
|
|
+ $re_protein = trim($form_state['values']['re_protein']);
|
|
|
|
|
|
|
|
|
|
|
@@ -256,6 +283,21 @@ function tripal_feature_gff3_load_form_validate($form, &$form_state) {
|
|
|
if ($line_number and !is_numeric($line_number) or $line_number < 0) {
|
|
|
form_set_error('line_number', t("Please provide an integer line number greater than zero."));
|
|
|
}
|
|
|
+
|
|
|
+ if (!($re_mrna and $re_protein) and ($re_mrna or $re_protein)) {
|
|
|
+ form_set_error('re_uname', t("You must provide both a regular expression for mRNA and a replacement string for protein"));
|
|
|
+ }
|
|
|
+
|
|
|
+ // check the regular expression to make sure it is valid
|
|
|
+ set_error_handler(function() {}, E_WARNING);
|
|
|
+ $result_re = preg_match("/" . $re_mrna . "/", null);
|
|
|
+ $result = preg_replace("/" . $re_mrna . "/", $re_protein, null);
|
|
|
+ restore_error_handler();
|
|
|
+ if ($result_re === FALSE) {
|
|
|
+ form_set_error('re_mrna', 'Invalid regular expression.');
|
|
|
+ } else if ($result === FALSE) {
|
|
|
+ form_set_error('re_protein', 'Invalid replacement string.');
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -281,12 +323,14 @@ function tripal_feature_gff3_load_form_submit($form, &$form_state) {
|
|
|
$landmark_type = trim($form_state['values']['landmark_type']);
|
|
|
$alt_id_attr = trim($form_state['values']['alt_id_attr']);
|
|
|
$create_organism = $form_state['values']['create_organism'];
|
|
|
+ $re_mrna = trim($form_state['values']['re_mrna']);
|
|
|
+ $re_protein = trim($form_state['values']['re_protein']);
|
|
|
|
|
|
|
|
|
$args = array($gff_file, $organism_id, $analysis_id, $add_only,
|
|
|
$update, $refresh, $remove, $use_transaction, $target_organism_id,
|
|
|
$target_type, $create_target, $line_number, $landmark_type, $alt_id_attr,
|
|
|
- $create_organism);
|
|
|
+ $re_mrna, $re_protein, $create_organism);
|
|
|
|
|
|
$type = '';
|
|
|
if ($add_only) {
|
|
@@ -367,6 +411,10 @@ function tripal_feature_gff3_load_form_submit($form, &$form_state) {
|
|
|
* Sometimes lines in the GFF file are missing the required ID attribute that
|
|
|
* specifies the unique name of the feature. If so, you may specify the
|
|
|
* name of an existing attribute to use for the ID.
|
|
|
+ * @param $re_mrna A
|
|
|
+ * regular expression to extract portions from mRNA id
|
|
|
+ * @param $re_protein A
|
|
|
+ * replacement string to generate the protein id
|
|
|
* @param $create_organism
|
|
|
* The Tripal GFF loader supports the "organism" attribute. This allows
|
|
|
* features of a different organism to be aligned to the landmark sequence of
|
|
@@ -383,8 +431,8 @@ function tripal_feature_gff3_load_form_submit($form, &$form_state) {
|
|
|
function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
|
|
|
$add_only = 0, $update = 1, $refresh = 0, $remove = 0, $use_transaction = 1,
|
|
|
$target_organism_id = NULL, $target_type = NULL, $create_target = 0,
|
|
|
- $start_line = 1, $landmark_type = '', $alt_id_attr = '', $create_organism = FALSE,
|
|
|
- $job = NULL) {
|
|
|
+ $start_line = 1, $landmark_type = '', $alt_id_attr = '', $re_mrna = '',
|
|
|
+ $re_protein = '', $create_organism = FALSE, $job = NULL) {
|
|
|
|
|
|
$ret = array();
|
|
|
$date = getdate();
|
|
@@ -925,8 +973,16 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
|
|
|
// protein.
|
|
|
if (!$result->protein_id) {
|
|
|
// Get details about this protein
|
|
|
- $uname = $result->uniquename . '-protein';
|
|
|
- $name = $result->name;
|
|
|
+ if ($re_mrna and $re_protein) {
|
|
|
+ // We use a regex to generate protein name from mRNA name
|
|
|
+ $uname = preg_replace("/$re_mrna/", $re_protein, $result->uniquename);
|
|
|
+ $name = $uname;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ // No regex, use the default '-protein' suffix
|
|
|
+ $uname = $result->uniquename . '-protein';
|
|
|
+ $name = $result->name;
|
|
|
+ }
|
|
|
$values = array(
|
|
|
'parent_id' => $result->feature_id,
|
|
|
'fmin' => $result->fmin
|