<?php
namespace Tests;

use StatonLab\TripalTestSuite\DBTransaction;
use StatonLab\TripalTestSuite\TripalTestCase;

class ValidateFastaTest extends TripalTestCase {
  // Uncomment to auto start and rollback db transactions per test method.
  // use DBTransaction;

  /**
   * Test validate_fasta_sequence() detects invalid sequence.
   *
   * @dataProvider provideInvalidFasta
   */
  public function testInvalidFasta($type, $sequence) {
    $this->assertFalse(validate_fasta_sequence($type, $sequence),
      "Failed to detect invalid FASTA sequence.");
  }

  /**
   * Test validate_fasta_sequence() works for valid sequence.
   *
   * @dataProvider provideValidFasta
   */
  public function testValidFasta($type, $sequence) {
    $this->assertTrue(validate_fasta_sequence($type, $sequence),
      "Unable to pass valid fasta sequence");
  }

  /**
   * Provide invalid fasta entries to test.
   */
  public function provideInvalidFasta() {
    return [
      ['nucleotide',
'>good first record
ATCGACTAGCTACGATCGACTAGCAGTCAGTACTGACGTACTACGATCGACTAGCATGCT
GCATCGATCGATCGACTATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGACT
> space before definition
ATTTGGGTGGTTGGGGTTCCCCCACACCGTGGGGTTTGGCAAAACCCGTGGGGCCACACA
CACACCCCTGGGGTGTGACCCGTAACGCGAATATGCGCGATATTACGGCGCGCGATATTA'
      ],
      ['nucleotide',
'>Has incorrect characters
ACGTGCATGCATCGACACTACGACTACGACTACTAGCTCGACTGATCGACATCGATCGAT
ACGTCAGCTACGGCGCGGCATGATCGAZZZZZZZZZZZZZZZZZZTCGCAACCCCTTTCA'
      ],
      ['nucleotide',
'>Has special characters
AACGTACGATCGACTAGCTACGATCGATCGACTAGCTAGCATCGATCGATCGATCGATCG
AATTGTCAACGT%%#^&^&%%$$#@@!#%^&*()@ACACGTAGCTAGGCCAACGTGCAAA'
      ],
      ['nucleotide',
'>Has numbers
ACGTACGATCGATCGACTAGCTAGCATCGATCGATCGATCAGCATCGATCGATCGACTCG
ACGTACGATCGATCGA2425345345ACGATCGACTAGCTAGCATCGATCGATCAGTCAG'
      ],
      ['protein', 
'>good first record
ACGTURYKMSWBDACGTURYKMSWBDHACGTURYKMSWBDHVNXACGTURYKMSWBDHVNXVNXHVNX
ACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVACGTURYKMSWBDHVNXNXXX
> space before identifier
ACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVACGTURYKMSWBDHVNXNXXX
ACGTURYKMSWBDHVNACGTACGTURYKMACGTURYKMSWBDHVNXSWBDHVNXURYKMSWBDHVNXX'
      ],
      ['protein', 
'>Has incorrect characters
ACGTURYKMSWBDACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVNXXXHVNX
ACGTURYKMSWBDHVACGTURYKMSWBZZZZZZZZZZZZZZZZZZDHVNACGTURYKMSWBDHVNXNZ'
      ],
      ['protein', 
'>Has special characters
ACGTURYKMSACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVNXXXWBDHVNX
ACGTURYKMSWBACGTURYKMSWB%#$%#$%#$^$%^%&^%&*&(*)()(!!#@#$#^DHVNXDHVNX'
      ],
      ['protein', 
'>Has numbers
ACGTURYKMSWBDHVACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVNXXXNX
ACGTURYKMSWACGTURYKMSWB13212324ACGTURYKMSWBDHVNX324443556DHVNXBDHVNX'
      ],
    ];
  }

  /**
   * Provide Valid FASTA entries to test.
   */
  public function provideValidFasta() {
    return [
      ['nucleotide',
'>identifier description
ACGACTGTACGCGAGCTACGTACGTAGCATGCATCGATCGATCGATCGATCGATCGATC
AGCTAGTCAGCATCGATGCATGCATCGACGTAATCGAGCGTAGCGAGCTAGTCATACGT'
      ],
      ['nucleotide',
'>identifier
ACGTACGATCGATCGATCGATCGATCGATCGACATGCTACGATCGATCGATCGATCGG'
      ],
      ['nucleotide',
'>mutlifasta description
AGCTAGCATCGATCGATCAGCTAGCATCGATCGATCGACTAGCTAGCATCGATCGATC
CGATCGATCAGCTAGCTACGATCGATCGATCGATCGACTAGCTACGATCGATCGATCG
CGATCGACTAGCGTACGATCGATCGATCGATCGATCGATCGATCACGATCAGCTACGT
>multifasta description
GCTACGATCGATCAGCTATCGACTATCGACGTATCGATGGAGTCATGCAGTCATGCAG
ACGCTACGATCAGCGTACGATCGATCGATCGATCGATCGATCGATCGATCGGTGCGTC'
      ],
      ['nucleotide',
'>with spaces in sequence
ACGATCGATCGATCGATAGCTACGATCGATCGATCGATCAGCTAGCTACGATCGATCA
ACAGCATCAGCTAG ACGATCAGCTAGCTACGATCGATCG  ACTGATCAGCATGCAT
AACGTACGATCAGCTAGCATGCAT   ATCGATCAGCTAGCTACGATCGATCGATCAG'
      ],
      ['nucleotide',
'>masked sequence
acgatcgatcgactagctacgatcgacatcgatcatgAACGTGTGGGGTGTGTGCAa
cagctagctagcatcgatcgatcgatcagctagcatgctacgagtcagcatcgtgca'
      ],
      ['protein',
'>identier description
ACGTURYKMSWBDHACGTURYKACGTURYKMSWBACGTURYKNXMSWBDHVNXVNX
YKMSWACGTURYKMSWBDHVNXBDHVNXACGTURYACGTURYKMSWBDSWBDHATK'
      ],
      ['protein',
'>identifier
ACGTURYKMSWBACGTURYKMSWBDHVNXDACGTURYKMSWBDHVNXHVACGTURY
ACGTURYKMSWBDHVNACGTURYKMSWBDACGTURYKMSWBDHVNXHVACGTURXX'
      ],
      ['protein',
'>multifasta description
ACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHACGTURYKMSWBDHVNXVNXXX
ACGTURYKMSWBDACGTURYKMSWBACGTURYKMSWBDHACGTURYKMSWBDHVNXVNXDHVNXHVNX
>multifasta description
ACGTURYKMSWBACGTURYKMSWBDHVNACGTURYKMSWBDHVACGTURYKMSWBDHVNXNXXDHVNX
ACGTURYKMSWBDHVNACGTURYKMSACGTURYKMSWBDACGTURYKMSWBDHVNXHVNXWBDHVNXX'
      ],
      ['protein',
'>with spaces in sequence
ACGTURYKMSWBDACGTURY KMSWBDHACGTURYKMSWBDH VACGTURYKMSW   BDHVNXNXVNXHVNX
 ACACGTURYKMSWBDHVGTURY  K MSWBDHVNX ACGTURYKMSWBDH  VACGTURYKMSWBDHVNXNX'
      ],
      ['protein',
'>masked sequence
acgturykmswbdhvacgturykmswbdhvacgturykmswbdhvnacgturykmswbdhAXNvnxxnxnx
acgturykmswbacgturykmswbdhvnxacgtuTTUrykmswbdhvnxdacgturykmswbdhvnxhvnx'
      ]
    ];
  } 
}