File size: 3,094 Bytes
372a807
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import streamlit as st
from typing import List, IO

# Import utilities you finalised
from utils import (
    get_pdf_text,
    get_docx_text,
    get_text_chunks,
    get_vector_store,
    user_input,
)

# ---------------------------------------------------------------------------#
#  Main Streamlit application
# ---------------------------------------------------------------------------#
def main() -> None:
    # ----- Page configuration ------------------------------------------------
    st.set_page_config(
        page_title="Docosphere",
        page_icon="πŸ“„",
        layout="wide"
    )

    st.title("πŸ“„ Docosphere")
    st.markdown("*Where Documents Come Alive …*")

    # Two-column layout: Q&A on left, file upload on right
    col_left, col_right = st.columns([2, 1])

    # --------------------- Right column – document upload -------------------
    with col_right:
        st.markdown("### πŸ“ Document Upload")
        uploaded_files: List[IO[bytes]] = st.file_uploader(
            "Upload PDF or Word files",
            accept_multiple_files=True,
            type=["pdf", "docx"],
            help="You can select multiple files at once."
        )

        if st.button("πŸš€ Process Documents"):
            if not uploaded_files:
                st.warning("πŸ“‹ Please upload at least one file first.")
                return

            with st.spinner("πŸ”„ Extracting text & creating vector index…"):
                combined_text = ""

                pdfs  = [f for f in uploaded_files if f.name.lower().endswith(".pdf")]
                docs  = [f for f in uploaded_files if f.name.lower().endswith(".docx")]

                if pdfs:
                    combined_text += get_pdf_text(pdfs)
                if docs:
                    combined_text += get_docx_text(docs)

                if combined_text.strip():
                    chunks = get_text_chunks(combined_text)
                    get_vector_store(chunks)
                    st.success("βœ… Documents processed! Ask away in the left panel.")
                else:
                    st.warning("⚠️ No readable text found in the uploaded files.")

        with st.expander("ℹ️ How to use"):
            st.markdown(
                """
                1. Upload one or more **PDF** or **Word** documents.\n
                2. Click **Process Documents** to build the knowledge index.\n
                3. Ask natural-language questions in the input box (left column).\n
                4. The assistant will either answer from its own model knowledge or
                   retrieve context from your documents when needed.
                """
            )

    # ---------------------- Left column – chat interface --------------------
    with col_left:
        st.markdown("### πŸ’¬ Ask Your Question")
        question: str = st.text_input(
            "",
            placeholder="Type a question about your documents or general topics…"
        )

        if question:
            user_input(question)

# Entry-point guard
if __name__ == "__main__":
    main()