abtsousa
Implement cache directory management and update file fetching to use cache if no working directory is provided
e8c805a
import os
import requests
from pathlib import Path
from config import DEFAULT_API_URL, CACHE_DIR
def fetch_task_file(task_id: str, working_dir: str | None = None) -> str | None:
"""
Fetch the file associated with a task_id from the API and save it to the working directory.
Args:
task_id: The task ID to fetch the file for
working_dir: The working directory to save the file to. If None, uses the global cache directory.
Returns:
The path to the downloaded file, or None if no file exists or error occurred
"""
# Use cache directory if working_dir is not provided
if working_dir is None:
working_dir = str(CACHE_DIR)
try:
files_url = f"{DEFAULT_API_URL}/files/{task_id}"
response = requests.get(files_url, timeout=30)
if response.status_code == 404:
# No file associated with this task
return None
elif response.status_code == 200:
# Try to determine filename from content-disposition header
filename = f"task_{task_id}_file"
if 'content-disposition' in response.headers:
content_disp = response.headers['content-disposition']
if 'filename=' in content_disp:
filename = content_disp.split('filename=')[1].strip('"')
# If content type suggests a specific extension
content_type = response.headers.get('content-type', '')
if 'json' in content_type and not filename.endswith('.json'):
filename += '.json'
elif 'text' in content_type and not filename.endswith('.txt'):
filename += '.txt'
elif 'csv' in content_type and not filename.endswith('.csv'):
filename += '.csv'
# Save file to working directory
file_path = os.path.join(working_dir, filename)
with open(file_path, 'wb') as f:
f.write(response.content)
print(f"Downloaded file for task {task_id}: {file_path}")
return file_path
else:
response.raise_for_status()
except Exception as e:
print(f"Error fetching file for task {task_id}: {e}")
return None
def extract_task_id_from_question_data(question_data: dict) -> str | None:
"""
Extract task_id from question data dictionary.
Args:
question_data: Dictionary containing question information
Returns:
The task_id if found, None otherwise
"""
return question_data.get("task_id")