Spaces:

Agents-MCP-Hackathon
/

biocynthia-demo

Sleeping

File size: 4,400 Bytes

086ca7b
dd2b907
a713c85
 
086ca7b
dd2b907
 
35eb5e1
 
 
 
 
 
 
 
dd2b907
35eb5e1
 
 
dd2b907
35eb5e1
 
086ca7b
35eb5e1
 
 
 
086ca7b
e3148e0
35eb5e1
 
086ca7b
35eb5e1
086ca7b
35eb5e1
086ca7b
 
 
 
 
 
 
35eb5e1
 
e3148e0
 
 
 
 
 
 
35eb5e1
 
e3148e0
 
00539a5
e3148e0
 
 
00539a5
e3148e0
 
 
 
 
717cff1
 
 
 
 
 
 
 
 
 
 
 
 
 
e3148e0
 
 
 
 
 
 
 
00539a5
568fe9a
ca699c2
717cff1
00539a5
e3148e0
 
 
717cff1
 
 
 
e3148e0
 
 
 
a713c85
e3148e0
 
 
 
 
 
 
00539a5
e3148e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
717cff1
e3148e0

from typing import List, Dict
import numpy as np
import gradio as gr

import data_utils



def smiles2monomers(smiles: str) -> list[str]:
    """
    Converts SMILES representation of a molecule to a list of monomers (also SMILES). Not implemented yet.
    """
    return []


def generate_monomers(num_monomers: int, monomers_vocab: List[str]) -> list[str]:
    """
    Produces random list of monomers with num_monomers elements. Not implemented yet.
    """
    return np.random.choice(monomers_vocab, num_monomers)


def monomer2domains(monomer: str, is_start=False, is_final=False) -> list[str]:
    """
    Converts a monomer to a list of domains. 
    Not implemented yet.
    """
    nrps_module = data_utils.module_generator.suggest_module(monomer, is_start=is_start, is_final=is_final)
    return nrps_module


def monomers2modules(monomer_list: list[str], is_cyclic: bool=False) -> List[List[Dict[str, str]]]:
    """
    Cyclicity flag is always ignored, since I haven't figured out yet how this is encoded in NRPS domains sequence.
    """
    modules_data = []
    for index, monomer in enumerate(monomer_list):
        is_start = index == 0
        is_final = index == len(monomer_list)
        domains_list = monomer2domains(monomer, is_start=is_start, is_final=is_final)
        modules_data.append(domains_list)
    return modules_data


# def find_bacteria(monomers: list[str]) -> list[str]:
#     """
#     Finds bacteria which might produce the target peptide.
#     Input: sequence of possible domains. Each domain is represented as a protein sequence.
#     Output: possible hist from blastp search.
#     """
#     return []


# def letter_counter(word, letter):
#     """Count the occurrences of a specific letter in a word.
    
#     Args:
#         word: The word or phrase to analyze
#         letter: The letter to count occurrences of
        
#     Returns:
#         The number of times the letter appears in the word
#     """
#     return word.lower().count(letter.lower())

def convert_to_fasta(modules_list):
    fasta_lines = []
    for imodule, module_data in enumerate(modules_list):
        for idomain, domain_data in enumerate(module_data):
            name = domain_data['name']
            sequence = domain_data['sequence']
            fasta_lines.extend(
                [
                    f">module_{imodule:02d}_domain_{idomain:02d}_{name}",
                    sequence
                ]
            )
    return '\n'.join(fasta_lines)


def generate_peptide_monomers(num_monomers: int):
    """Produces the peptide constructed from specified number of monomer fragments.
    Currently the sequence is picked randomly from predefined collection of monomers (aminoacids and their D- isomers).
    
    Args:
        num_monomers: The number of monomer fragments in the resulting 'peptide'
    
    Returns:
        The string which is constructed from specified number of monomer fragments separated by commas, 
        and the data for the corresponding domain sequences (for future searches with blastp, in .fasta format).
        
    """
    MONOMER_NAMES = data_utils.load_monomers()
    monomers_list = generate_monomers(num_monomers, MONOMER_NAMES)
    modules_data = monomers2modules(monomers_list)
    
    #print(modules_data)
    return ",".join(monomers_list), convert_to_fasta(modules_data)



if __name__ == "__main__":
    

    # demo = gr.Interface(
    #     fn=letter_counter,
    #     inputs=["text", "text"],
    #     outputs="number",
    #     title="Letter Counter",
    #     description="Count how many times a letter appears in a word"
    # )



    with gr.Blocks(title="NRPS domains 'generator'") as demo:
        gr.Markdown("""# BioCynthia
```
There are bacteria in soil and sea
They have what is called a B-G-C
These genes produce some complex peptides
And they might save our lives!
```
                    """)

        gr.Markdown("For more details on project goals and motivation, please refer to the README.md")
        
        frequency_slider = gr.Slider(
            minimum=2, 
            maximum=10, 
            step=1, 
            value=3, 
            label="Number of monomers in the target peptide"
        )
        gr.Interface(
            fn=generate_peptide_monomers,
            inputs=[frequency_slider],
            outputs=["text", "text"],
        )


    demo.launch(mcp_server=True)