File size: 4,400 Bytes
086ca7b dd2b907 a713c85 086ca7b dd2b907 35eb5e1 dd2b907 35eb5e1 dd2b907 35eb5e1 086ca7b 35eb5e1 086ca7b e3148e0 35eb5e1 086ca7b 35eb5e1 086ca7b 35eb5e1 086ca7b 35eb5e1 e3148e0 35eb5e1 e3148e0 00539a5 e3148e0 00539a5 e3148e0 717cff1 e3148e0 00539a5 568fe9a ca699c2 717cff1 00539a5 e3148e0 717cff1 e3148e0 a713c85 e3148e0 00539a5 e3148e0 717cff1 e3148e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
from typing import List, Dict
import numpy as np
import gradio as gr
import data_utils
def smiles2monomers(smiles: str) -> list[str]:
"""
Converts SMILES representation of a molecule to a list of monomers (also SMILES). Not implemented yet.
"""
return []
def generate_monomers(num_monomers: int, monomers_vocab: List[str]) -> list[str]:
"""
Produces random list of monomers with num_monomers elements. Not implemented yet.
"""
return np.random.choice(monomers_vocab, num_monomers)
def monomer2domains(monomer: str, is_start=False, is_final=False) -> list[str]:
"""
Converts a monomer to a list of domains.
Not implemented yet.
"""
nrps_module = data_utils.module_generator.suggest_module(monomer, is_start=is_start, is_final=is_final)
return nrps_module
def monomers2modules(monomer_list: list[str], is_cyclic: bool=False) -> List[List[Dict[str, str]]]:
"""
Cyclicity flag is always ignored, since I haven't figured out yet how this is encoded in NRPS domains sequence.
"""
modules_data = []
for index, monomer in enumerate(monomer_list):
is_start = index == 0
is_final = index == len(monomer_list)
domains_list = monomer2domains(monomer, is_start=is_start, is_final=is_final)
modules_data.append(domains_list)
return modules_data
# def find_bacteria(monomers: list[str]) -> list[str]:
# """
# Finds bacteria which might produce the target peptide.
# Input: sequence of possible domains. Each domain is represented as a protein sequence.
# Output: possible hist from blastp search.
# """
# return []
# def letter_counter(word, letter):
# """Count the occurrences of a specific letter in a word.
# Args:
# word: The word or phrase to analyze
# letter: The letter to count occurrences of
# Returns:
# The number of times the letter appears in the word
# """
# return word.lower().count(letter.lower())
def convert_to_fasta(modules_list):
fasta_lines = []
for imodule, module_data in enumerate(modules_list):
for idomain, domain_data in enumerate(module_data):
name = domain_data['name']
sequence = domain_data['sequence']
fasta_lines.extend(
[
f">module_{imodule:02d}_domain_{idomain:02d}_{name}",
sequence
]
)
return '\n'.join(fasta_lines)
def generate_peptide_monomers(num_monomers: int):
"""Produces the peptide constructed from specified number of monomer fragments.
Currently the sequence is picked randomly from predefined collection of monomers (aminoacids and their D- isomers).
Args:
num_monomers: The number of monomer fragments in the resulting 'peptide'
Returns:
The string which is constructed from specified number of monomer fragments separated by commas,
and the data for the corresponding domain sequences (for future searches with blastp, in .fasta format).
"""
MONOMER_NAMES = data_utils.load_monomers()
monomers_list = generate_monomers(num_monomers, MONOMER_NAMES)
modules_data = monomers2modules(monomers_list)
#print(modules_data)
return ",".join(monomers_list), convert_to_fasta(modules_data)
if __name__ == "__main__":
# demo = gr.Interface(
# fn=letter_counter,
# inputs=["text", "text"],
# outputs="number",
# title="Letter Counter",
# description="Count how many times a letter appears in a word"
# )
with gr.Blocks(title="NRPS domains 'generator'") as demo:
gr.Markdown("""# BioCynthia
```
There are bacteria in soil and sea
They have what is called a B-G-C
These genes produce some complex peptides
And they might save our lives!
```
""")
gr.Markdown("For more details on project goals and motivation, please refer to the README.md")
frequency_slider = gr.Slider(
minimum=2,
maximum=10,
step=1,
value=3,
label="Number of monomers in the target peptide"
)
gr.Interface(
fn=generate_peptide_monomers,
inputs=[frequency_slider],
outputs=["text", "text"],
)
demo.launch(mcp_server=True)
|