# Instead of FROM python:3.12, use a slim base image FROM python:3.12-slim # Set the working directory WORKDIR /app # Copy requirements.txt (llama-cpp-python is already included in the base image) COPY ./requirements.txt /app/requirements.txt # Install only project-specific dependencies RUN pip install --no-cache-dir -r requirements.txt # Copy the entire application source code COPY . /app # Expose port for Hugging Face Spaces EXPOSE 7860 # Run FastAPI with Gunicorn - increased timeout for model loading CMD ["gunicorn", "app.main:app", \ "-k", "uvicorn.workers.UvicornWorker", \ "--bind", "0.0.0.0:7860", \ "--workers", "1", \ "--timeout", "600", \ "--graceful-timeout", "600", \ "--log-level", "info"]