Spaces:

aditya2001
/

VidSimplify

Sleeping

VidSimplify / manimator /inputs /processor.py

Adityahulk

Restoring repo state for deployment

6fc3143 16 days ago

2.82 kB

	import logging
	from pathlib import Path
	from typing import Union
	from .pdf_parser import PDFParser
	from .url_parser import URLParser

	logger = logging.getLogger(__name__)

	class InputProcessor:
	"""
	Main entry point for processing different types of inputs.
	"""

	@staticmethod
	def process(input_type: str, input_data: str) -> str:
	"""
	Process input based on type.

	Args:
	input_type: 'text', 'pdf', or 'url'
	input_data: The actual text, file path, or URL

	Returns:
	Extracted text content
	"""
	logger.info(f"Processing input type: {input_type}")

	if input_type == 'text':
	return input_data

	elif input_type == 'pdf':
	# Check if input_data is a file path
	is_path = False
	try:
	# Only check if it looks like a path (not too long)
	if len(str(input_data)) < 256 and Path(input_data).exists():
	is_path = True
	except Exception:
	pass

	if is_path:
	return PDFParser.parse(input_data)

	# Try to decode as base64
	import base64
	import tempfile
	import os

	try:
	# Create a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
	# If it's bytes, write directly. If str, decode.
	if isinstance(input_data, bytes):
	tmp.write(input_data)
	else:
	# Handle potential header "data:application/pdf;base64,"
	if "," in input_data:
	input_data = input_data.split(",")[1]
	tmp.write(base64.b64decode(input_data))
	tmp_path = tmp.name

	logger.info(f"Saved base64 PDF to temporary file: {tmp_path}")
	try:
	text = PDFParser.parse(tmp_path)
	return text
	finally:
	# Cleanup temp file
	try:
	os.unlink(tmp_path)
	except Exception as e:
	logger.warning(f"Failed to delete temp PDF file: {e}")

	except Exception as e:
	logger.error(f"Failed to process PDF input: {e}")
	raise ValueError(f"Invalid PDF input: {e}")

	elif input_type == 'url':
	return URLParser.parse(input_data)

	else:
	raise ValueError(f"Unsupported input type: {input_type}")