import pytest from folding_studio.query.soloseq import SoloSeqParameters, SoloSeqQuery from folding_studio_data_models.exceptions import FastaValidationError def test_soloseqquery_from_protein_sequence(): sequence = ">tag1|tag2\nABCDEGF" query = SoloSeqQuery.from_protein_sequence(sequence) assert query.fasta_files == {"tag1": ">tag1|tag2\nABCDEGF"} assert query.query_name == "tag1" assert query.parameters == SoloSeqParameters() parameters = {"data_random_seed": 42, "skip_relaxation": False, "unsupported": 123} query = SoloSeqQuery.from_protein_sequence(sequence, **parameters) assert query.fasta_files == {"tag1": ">tag1|tag2\nABCDEGF"} assert query.query_name == "tag1" assert query.parameters == SoloSeqParameters( data_random_seed=42, skip_relaxation=False ) with pytest.raises(FastaValidationError): multimer_sequence = ">tag1|tag2\nABCDEGF\n>tag3\nABCDEGF" SoloSeqQuery.from_protein_sequence(multimer_sequence) def test_soloseqquery_from_fasta_file(tmp_files): with pytest.raises(ValueError, match="Unsupported suffix"): SoloSeqQuery.from_file(tmp_files["invalid_source"]) with pytest.raises(FastaValidationError): SoloSeqQuery.from_file(tmp_files["empty_fasta"]) with pytest.raises(FastaValidationError): SoloSeqQuery.from_file(tmp_files["multimer_fasta"]) query = SoloSeqQuery.from_file(tmp_files["monomer_fasta"]) assert query.fasta_files == {"monomer": ">tag1|tag2\nABCDEGF"} assert query.query_name == "monomer" assert query.parameters == SoloSeqParameters() parameters = {"data_random_seed": 42, "skip_relaxation": False, "unsupported": 123} query = SoloSeqQuery.from_file(tmp_files["monomer_fasta"], **parameters) assert query.fasta_files == {"monomer": ">tag1|tag2\nABCDEGF"} assert query.query_name == "monomer" assert query.parameters == SoloSeqParameters( data_random_seed=42, skip_relaxation=False ) def test_soloseqquery_from_directory(tmp_files): with pytest.raises(ValueError, match="No FASTA files found in directory"): SoloSeqQuery.from_directory(tmp_files["empty_dir"]) with pytest.raises(FastaValidationError): SoloSeqQuery.from_directory(tmp_files["dir_with_multimer"]) query = SoloSeqQuery.from_directory(tmp_files["valid_dir"]) assert query.fasta_files == { "monomer_1": ">tag1|tag2\nABCDEGF", "monomer_2": ">tag1|tag2\nABCDEGF", } assert query.query_name == "valid_dir" assert query.parameters == SoloSeqParameters() parameters = {"data_random_seed": 42, "skip_relaxation": False, "unsupported": 123} query = SoloSeqQuery.from_directory(tmp_files["valid_dir"], **parameters) assert query.fasta_files == { "monomer_1": ">tag1|tag2\nABCDEGF", "monomer_2": ">tag1|tag2\nABCDEGF", } assert query.query_name == "valid_dir" assert query.parameters == SoloSeqParameters( data_random_seed=42, skip_relaxation=False )