import os import requests from pathlib import Path from config import DEFAULT_API_URL, CACHE_DIR def fetch_task_file(task_id: str, working_dir: str | None = None) -> str | None: """ Fetch the file associated with a task_id from the API and save it to the working directory. Args: task_id: The task ID to fetch the file for working_dir: The working directory to save the file to. If None, uses the global cache directory. Returns: The path to the downloaded file, or None if no file exists or error occurred """ # Use cache directory if working_dir is not provided if working_dir is None: working_dir = str(CACHE_DIR) try: files_url = f"{DEFAULT_API_URL}/files/{task_id}" response = requests.get(files_url, timeout=30) if response.status_code == 404: # No file associated with this task return None elif response.status_code == 200: # Try to determine filename from content-disposition header filename = f"task_{task_id}_file" if 'content-disposition' in response.headers: content_disp = response.headers['content-disposition'] if 'filename=' in content_disp: filename = content_disp.split('filename=')[1].strip('"') # If content type suggests a specific extension content_type = response.headers.get('content-type', '') if 'json' in content_type and not filename.endswith('.json'): filename += '.json' elif 'text' in content_type and not filename.endswith('.txt'): filename += '.txt' elif 'csv' in content_type and not filename.endswith('.csv'): filename += '.csv' # Save file to working directory file_path = os.path.join(working_dir, filename) with open(file_path, 'wb') as f: f.write(response.content) print(f"Downloaded file for task {task_id}: {file_path}") return file_path else: response.raise_for_status() except Exception as e: print(f"Error fetching file for task {task_id}: {e}") return None def extract_task_id_from_question_data(question_data: dict) -> str | None: """ Extract task_id from question data dictionary. Args: question_data: Dictionary containing question information Returns: The task_id if found, None otherwise """ return question_data.get("task_id")