Sarisha Das commited on
Commit
dfdc8a2
Β·
1 Parent(s): e47665d

rename all folders, fix docker path

Browse files
Dockerfile CHANGED
@@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y \
10
 
11
  COPY requirements.txt ./
12
  COPY src/ ./src/
13
- COPY utils/ ./utils/
14
 
15
  RUN pip3 install -r requirements.txt
16
 
@@ -18,4 +18,4 @@ EXPOSE 8501
18
 
19
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
 
21
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
10
 
11
  COPY requirements.txt ./
12
  COPY src/ ./src/
13
+ COPY app/ ./app/
14
 
15
  RUN pip3 install -r requirements.txt
16
 
 
18
 
19
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
 
21
+ ENTRYPOINT ["streamlit", "run", "app/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
src/streamlit_app.py β†’ app/app.py RENAMED
@@ -5,13 +5,12 @@ from pathlib import Path
5
  import streamlit as st
6
 
7
  # ─── Repo root is the working directory on HF Spaces ─────────────────────────
8
- ROOT = Path(__file__).resolve().parent.parent # app.py lives at repo root
9
- sys.path.append(str(ROOT))
10
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
11
 
12
- os.environ["HF_HOME"] = str(ROOT / ".hf_cache")
13
- os.environ["TRANSFORMERS_CACHE"] = str(ROOT / ".hf_cache" / "transformers")
14
 
 
 
 
15
  from utils.retrieval_helpers import enrich_search_results, enrich_bm25_search_results
16
  from utils.bm25 import load
17
  from utils.semantic import load_vector_store
@@ -56,6 +55,7 @@ def load_hf_dataset():
56
  return load_dataset(
57
  "McAuley-Lab/Amazon-Reviews-2023",
58
  "raw_meta_Grocery_and_Gourmet_Food",
 
59
  trust_remote_code=True,
60
  token=HF_TOKEN
61
  )
@@ -74,7 +74,7 @@ def load_vector_store_cached():
74
  token=HF_TOKEN,
75
  )
76
 
77
- mini_index_path = Path(snapshot_path) / "tokenisation" / "bm25_index.pkl"
78
  embeddings_dir = Path(snapshot_path) / "embeddings"
79
 
80
  vector_store = load_vector_store(embeddings_dir)
@@ -98,7 +98,7 @@ HF_DATASET = load_hf_dataset()
98
  if data_source == 'local':
99
  MINI_INDEX_PATH = ROOT / "data" / "processed" / "tokenisation" / "bm25_index_mini.pkl"
100
 
101
- vector_store = load_vector_store(ROOT_FOLDER / 'data' / 'processed' / 'embeddings')
102
  retriever = load(MINI_INDEX_PATH)
103
  else:
104
 
 
5
  import streamlit as st
6
 
7
  # ─── Repo root is the working directory on HF Spaces ─────────────────────────
8
+ ROOT = Path(__file__).resolve().parent.parent
 
 
9
 
 
 
10
 
11
+ # os.environ["HF_HOME"] = str("./.hf_cache")
12
+ sys.path.append(str(ROOT))
13
+ sys.path.insert(0, str(ROOT / "utils"))
14
  from utils.retrieval_helpers import enrich_search_results, enrich_bm25_search_results
15
  from utils.bm25 import load
16
  from utils.semantic import load_vector_store
 
55
  return load_dataset(
56
  "McAuley-Lab/Amazon-Reviews-2023",
57
  "raw_meta_Grocery_and_Gourmet_Food",
58
+ split='full',
59
  trust_remote_code=True,
60
  token=HF_TOKEN
61
  )
 
74
  token=HF_TOKEN,
75
  )
76
 
77
+ mini_index_path = Path(snapshot_path) / "tokenisation" / "bm25_index_mini.pkl"
78
  embeddings_dir = Path(snapshot_path) / "embeddings"
79
 
80
  vector_store = load_vector_store(embeddings_dir)
 
98
  if data_source == 'local':
99
  MINI_INDEX_PATH = ROOT / "data" / "processed" / "tokenisation" / "bm25_index_mini.pkl"
100
 
101
+ vector_store = load_vector_store(ROOT / 'data' / 'processed' / 'embeddings')
102
  retriever = load(MINI_INDEX_PATH)
103
  else:
104
 
{src β†’ app}/styles.css RENAMED
File without changes
{utils β†’ src}/bm25.py RENAMED
File without changes
{utils β†’ src}/eda_helpers.py RENAMED
File without changes
{utils β†’ src}/helpers.py RENAMED
File without changes
{utils β†’ src}/hybrid.py RENAMED
File without changes
{utils β†’ src}/rag_pipeline.py RENAMED
File without changes
{utils β†’ src}/retrieval_helpers.py RENAMED
File without changes
{utils β†’ src}/semantic.py RENAMED
File without changes