Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException | |
| from dotenv import load_dotenv | |
| import boto3 | |
| import os | |
| import uvicorn | |
| import logging | |
| from uuid import uuid4 | |
| from pydantic import BaseModel | |
| from helper import PdfToSectionConverter | |
| # Load environment variables | |
| load_dotenv() | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Fetch AWS credentials from environment | |
| s3_access_key_id = os.getenv("S3_ACCESS_KEY_ID") | |
| s3_secret_key = os.getenv("S3_SECRET_KEY") | |
| aws_region = os.getenv("AWS_REGION") | |
| # Validate environment variables | |
| if not all([s3_access_key_id, s3_secret_key, aws_region]): | |
| logger.error("Missing AWS S3 credentials in environment variables.") | |
| raise ValueError("AWS credentials not set properly.") | |
| # Initialize FastAPI app | |
| app = FastAPI() | |
| # Configure S3 client | |
| s3_client = boto3.client( | |
| "s3", | |
| aws_access_key_id=s3_access_key_id, | |
| aws_secret_access_key=s3_secret_key, | |
| region_name=aws_region, | |
| ) | |
| class PdfRequest(BaseModel): | |
| s3_file_path: str | |
| file_title: str | |
| doc_id : str | |
| start_page: int = 0 | |
| end_page: int = 0 | |
| async def start(): | |
| return {"message": "Parser API is Ready"} | |
| async def convert_pdf(request: PdfRequest): | |
| try: | |
| output_dir = "/tmp" | |
| output_path = os.path.join(output_dir, "temp_file.pdf") | |
| doc_id = request.doc_id | |
| # Ensure the directory exists | |
| if not os.path.exists(output_dir): | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Validate S3 file path | |
| if not request.s3_file_path.startswith("s3://"): | |
| raise HTTPException(status_code=400, detail="Invalid S3 file path. Must start with 's3://'") | |
| try: | |
| bucket_name, object_key = request.s3_file_path.replace("s3://", "").split("/", 1) | |
| except ValueError: | |
| raise HTTPException(status_code=400, detail="Invalid S3 file path format.") | |
| logger.info(f"Downloading {request.s3_file_path} from S3 bucket {bucket_name}...") | |
| # Download PDF from S3 | |
| try: | |
| s3_client.download_file(bucket_name, object_key, output_path) | |
| except Exception as e: | |
| logger.error(f"Failed to download file from S3: {str(e)}") | |
| raise HTTPException(status_code=500, detail="Error downloading file from S3.") | |
| # Initialize and run the converter | |
| converter = PdfToSectionConverter() | |
| output = converter.convert( | |
| downloaded_pdf_path=output_path, | |
| file_title=request.file_title, | |
| doc_id=doc_id, | |
| start_page_no=request.start_page, | |
| end_page_no=request.end_page | |
| ) | |
| # Cleanup the temporary file | |
| os.remove(output_path) | |
| return {"status": "success", "data": output} | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| logger.error(f"Unexpected error: {str(e)}") | |
| raise HTTPException(status_code=500, detail="Internal Server Error.") | |
| def start_server(): | |
| logger.info("Starting Server...") | |
| uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True) | |
| if __name__ == "__main__": | |
| start_server() | |