Spaces:

zwt963
/

paperindex

Sleeping

paperindex / test /test_evaluation.py

DVampire

update website

78f6650 4 months ago

5.94 kB

	#!/usr/bin/env python3
	"""
	Test script: Verify that the run_evaluation function works correctly
	"""

	import asyncio
	import os
	import sys
	from pathlib import Path
	from dotenv import load_dotenv
	import argparse
	from mmengine import DictAction

	# 加载环境变量
	load_dotenv(verbose=True)

	# 设置根目录路径
	root = str(Path(__file__).resolve().parents[1])
	sys.path.append(root)

	from src.database import db
	from src.logger import logger
	from src.config import config
	from src.agents.evaluator import run_evaluation


	def parse_args():
	"""Parse command line arguments"""
	parser = argparse.ArgumentParser(description='main')
	parser.add_argument("--config", default=os.path.join(root, "configs", "paper_agent.py"), help="config file path")

	parser.add_argument(
	'--cfg-options',
	nargs='+',
	action=DictAction,
	help='override some settings in the used config, the key-value pair '
	'in xxx=yyy format will be merged into config file. If the value to '
	'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
	'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
	'Note that the quotation marks are necessary and that no white space '
	'is allowed.')
	args = parser.parse_args()
	return args


	async def test_evaluation():
	"""Test evaluation functionality"""
	print("=== Starting Evaluation Test ===")

	# Test parameters
	test_arxiv_id = "2508.09889" # Use existing paper in database
	test_pdf_url = f"https://arxiv.org/pdf/{test_arxiv_id}.pdf"

	print(f"Test paper ID: {test_arxiv_id}")
	print(f"PDF URL: {test_pdf_url}")

	# Check API key
	api_key = os.getenv("ANTHROPIC_API_KEY")
	if not api_key:
	print("❌ Error: ANTHROPIC_API_KEY environment variable not found")
	return False

	print(f"✅ API key is set: {api_key[:20]}...")

	try:
	# Check if paper exists in database
	paper = await db.get_paper(test_arxiv_id)
	if paper:
	print(f"✅ Paper found in database: {paper['title']}")
	else:
	print(f"⚠️ Paper not in database, creating new record")
	# Insert test paper
	await db.insert_paper(
	arxiv_id=test_arxiv_id,
	title="Test Paper for Evaluation",
	authors="Test Author",
	abstract="This is a test paper for evaluation.",
	categories="cs.AI",
	published_date="2024-08-01"
	)
	print(f"✅ Test paper inserted into database")

	print("\n=== Starting Evaluation ===")

	# Run evaluation
	result = await run_evaluation(
	pdf_path=test_pdf_url,
	arxiv_id=test_arxiv_id,
	api_key=api_key
	)

	print(f"\n=== Evaluation Results ===")
	print(f"Result length: {len(result)} characters")
	print(f"First 500 characters: {result[:500]}...")

	# Check if result contains expected content
	if "AI Automation Assessment" in result or "Executive Summary" in result:
	print("✅ Evaluation result contains expected content")
	else:
	print("⚠️ Evaluation result may be incomplete")

	# Check evaluation status in database
	updated_paper = await db.get_paper(test_arxiv_id)
	if updated_paper and updated_paper.get('is_evaluated'):
	print("✅ Evaluation saved to database")
	print(f"Evaluation score: {updated_paper.get('evaluation_score')}")
	print(f"Evaluation tags: {updated_paper.get('evaluation_tags')}")
	else:
	print("❌ Evaluation not saved to database")

	return True

	except Exception as e:
	print(f"❌ Error during evaluation: {str(e)}")
	import traceback
	traceback.print_exc()
	return False


	async def test_database_operations():
	"""Test database operations"""
	print("\n=== Testing Database Operations ===")

	try:
	# Test getting paper
	paper = await db.get_paper("2508.09889")
	if paper:
	print(f"✅ Database connection OK, found paper: {paper['title']}")
	else:
	print("⚠️ Test paper not found in database")

	# Test getting paper statistics
	stats = await db.get_papers_count()
	print(f"✅ Paper statistics: Total={stats['total']}, Evaluated={stats['evaluated']}, Unevaluated={stats['unevaluated']}")

	return True

	except Exception as e:
	print(f"❌ Database operation error: {str(e)}")
	return False


	async def main():
	"""Main test function"""
	print("🚀 Starting Evaluation System Test")

	# Parse command line arguments
	args = parse_args()

	# Initialize configuration
	config.init_config(args.config, args)

	# Initialize logger
	logger.init_logger(config=config)
	logger.info(f"\| Logger initialized at: {config.log_path}")
	logger.info(f"\| Config:\n{config.pretty_text}")

	# Initialize database
	await db.init_db(config=config)
	logger.info(f"\| Database initialized at: {config.db_path}")

	print(f"✅ Database initialized: {config.db_path}")

	# Test database operations
	db_success = await test_database_operations()

	# Test evaluation functionality
	eval_success = await test_evaluation()

	print("\n=== Test Summary ===")
	print(f"Database operations: {'✅ Success' if db_success else '❌ Failed'}")
	print(f"Evaluation functionality: {'✅ Success' if eval_success else '❌ Failed'}")

	if db_success and eval_success:
	print("🎉 All tests passed!")
	else:
	print("⚠️ Some tests failed, please check error messages")


	if __name__ == "__main__":
	asyncio.run(main())