Spaces:

BasalGanglia
/

kgraph-mcp-agent-platform

Sleeping

App Files Files Community

kgraph-mcp-agent-platform / mcp_file_processor_gradio /app.py

BasalGanglia

🛠️ Fix HuggingFace Space configuration - Remove quotes from frontmatter

64ced8b verified 6 months ago

raw

history blame contribute delete

13.6 kB

	#!/usr/bin/env python3
	"""
	MCP File Processor Tool - Gradio Implementation

	This MCP server provides file processing capabilities including:
	- Text extraction from various file formats
	- CSV data analysis
	- File format conversion
	- Document processing

	Supports MCP protocol via Gradio interface.
	"""

	import csv
	import json
	import logging
	import os
	from io import StringIO

	import gradio as gr

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	try:
	import pandas as pd
	PANDAS_AVAILABLE = True
	except ImportError:
	PANDAS_AVAILABLE = False
	logger.warning("Pandas not available, limited CSV analysis")


	class FileProcessor:
	"""File processing service with text extraction and analysis."""

	def __init__(self):
	"""Initialize the file processor."""
	self.supported_formats = {
	"txt": self._process_text_file,
	"csv": self._process_csv_file,
	"json": self._process_json_file,
	"md": self._process_markdown_file,
	}

	def process_file(self, file_path: str, file_content: bytes = None) -> str:
	"""
	Process uploaded file and extract information.

	Args:
	file_path: Path to the uploaded file
	file_content: Raw file content bytes

	Returns:
	String with processed file information
	"""
	try:
	if not file_path and not file_content:
	return "Error: No file provided"

	# Determine file extension
	if file_path:
	file_ext = os.path.splitext(file_path)[1].lower().lstrip(".")
	file_name = os.path.basename(file_path)
	else:
	file_ext = "unknown"
	file_name = "uploaded_file"

	# Read file content if not provided
	if file_content is None and file_path:
	try:
	with open(file_path, "rb") as f:
	file_content = f.read()
	except Exception as e:
	return f"Error reading file: {e!s}"

	if not file_content:
	return "Error: Empty file or could not read content"

	# Get file size
	file_size = len(file_content)

	# Process based on file type
	if file_ext in self.supported_formats:
	content_analysis = self.supported_formats[file_ext](file_content)
	else:
	content_analysis = self._process_unknown_file(file_content)

	# Build result
	result = []
	result.append("📁 FILE PROCESSING RESULTS")
	result.append("=" * 40)
	result.append(f"File: {file_name}")
	result.append(f"Type: {file_ext.upper() if file_ext else 'Unknown'}")
	result.append(f"Size: {file_size:,} bytes ({self._format_size(file_size)})")
	result.append("")
	result.append("📄 CONTENT ANALYSIS:")
	result.append("-" * 25)
	result.append(content_analysis)

	logger.info(f"Successfully processed file: {file_name} ({file_size} bytes)")
	return "\n".join(result)

	except Exception as e:
	error_msg = f"Error processing file: {e!s}"
	logger.error(error_msg)
	return error_msg

	def _process_text_file(self, content: bytes) -> str:
	"""Process plain text file."""
	try:
	text = content.decode("utf-8", errors="ignore")
	lines = text.split("\n")

	analysis = []
	analysis.append(f"Lines: {len(lines)}")
	analysis.append(f"Characters: {len(text)}")
	analysis.append(f"Words: {len(text.split())}")
	analysis.append(f"Paragraphs: {len([line for line in lines if line.strip()])}")
	analysis.append("")
	analysis.append("📖 CONTENT PREVIEW:")
	analysis.append(text[:500] + ("..." if len(text) > 500 else ""))

	return "\n".join(analysis)

	except Exception as e:
	return f"Error processing text file: {e!s}"

	def _process_csv_file(self, content: bytes) -> str:
	"""Process CSV file."""
	try:
	text = content.decode("utf-8", errors="ignore")

	if PANDAS_AVAILABLE:
	return self._process_csv_with_pandas(text)
	return self._process_csv_basic(text)

	except Exception as e:
	return f"Error processing CSV file: {e!s}"

	def _process_csv_with_pandas(self, text: str) -> str:
	"""Process CSV using pandas for advanced analysis."""
	try:
	df = pd.read_csv(StringIO(text))

	analysis = []
	analysis.append(f"Rows: {len(df)}")
	analysis.append(f"Columns: {len(df.columns)}")
	analysis.append("")
	analysis.append("📊 COLUMN INFORMATION:")
	for col in df.columns:
	dtype = str(df[col].dtype)
	null_count = df[col].isnull().sum()
	analysis.append(f" • {col}: {dtype} ({null_count} nulls)")

	analysis.append("")
	analysis.append("📈 DATA PREVIEW:")
	analysis.append(df.head().to_string())

	# Basic statistics for numeric columns
	numeric_cols = df.select_dtypes(include=["number"]).columns
	if len(numeric_cols) > 0:
	analysis.append("")
	analysis.append("📊 NUMERIC STATISTICS:")
	analysis.append(df[numeric_cols].describe().to_string())

	return "\n".join(analysis)

	except Exception as e:
	return f"Error in pandas CSV processing: {e!s}"

	def _process_csv_basic(self, text: str) -> str:
	"""Process CSV using basic Python CSV module."""
	try:
	reader = csv.reader(StringIO(text))
	rows = list(reader)

	if not rows:
	return "Empty CSV file"

	headers = rows[0] if rows else []
	data_rows = rows[1:] if len(rows) > 1 else []

	analysis = []
	analysis.append(f"Rows: {len(data_rows)} (+ 1 header)")
	analysis.append(f"Columns: {len(headers)}")
	analysis.append("")
	analysis.append("📋 COLUMNS:")
	for i, header in enumerate(headers):
	analysis.append(f" {i+1}. {header}")

	analysis.append("")
	analysis.append("📄 SAMPLE DATA:")
	for i, row in enumerate(data_rows[:5]):
	analysis.append(f"Row {i+1}: {row}")

	if len(data_rows) > 5:
	analysis.append(f"... and {len(data_rows) - 5} more rows")

	return "\n".join(analysis)

	except Exception as e:
	return f"Error in basic CSV processing: {e!s}"

	def _process_json_file(self, content: bytes) -> str:
	"""Process JSON file."""
	try:
	text = content.decode("utf-8", errors="ignore")
	data = json.loads(text)

	analysis = []
	analysis.append(f"Type: {type(data).__name__}")

	if isinstance(data, dict):
	analysis.append(f"Keys: {len(data.keys())}")
	analysis.append("")
	analysis.append("🗝️ TOP-LEVEL KEYS:")
	for key in list(data.keys())[:10]:
	value_type = type(data[key]).__name__
	analysis.append(f" • {key}: {value_type}")
	if len(data.keys()) > 10:
	analysis.append(f" ... and {len(data.keys()) - 10} more keys")

	elif isinstance(data, list):
	analysis.append(f"Items: {len(data)}")
	if data:
	first_item_type = type(data[0]).__name__
	analysis.append(f"First item type: {first_item_type}")

	analysis.append("")
	analysis.append("📄 CONTENT PREVIEW:")
	preview = json.dumps(data, indent=2)[:800]
	analysis.append(preview + ("..." if len(preview) >= 800 else ""))

	return "\n".join(analysis)

	except json.JSONDecodeError as e:
	return f"Invalid JSON format: {e!s}"
	except Exception as e:
	return f"Error processing JSON: {e!s}"

	def _process_markdown_file(self, content: bytes) -> str:
	"""Process Markdown file."""
	try:
	text = content.decode("utf-8", errors="ignore")
	lines = text.split("\n")

	# Count markdown elements
	headers = [line for line in lines if line.strip().startswith("#")]
	links = len([line for line in lines if "[" in line and "](" in line])
	code_blocks = text.count("```")

	analysis = []
	analysis.append(f"Lines: {len(lines)}")
	analysis.append(f"Characters: {len(text)}")
	analysis.append(f"Headers: {len(headers)}")
	analysis.append(f"Links: {links}")
	analysis.append(f"Code blocks: {code_blocks // 2}") # Divide by 2 (start + end)
	analysis.append("")

	if headers:
	analysis.append("📑 DOCUMENT STRUCTURE:")
	for header in headers[:10]:
	level = len(header) - len(header.lstrip("#"))
	title = header.lstrip("# ").strip()
	indent = " " * (level - 1)
	analysis.append(f"{indent}• {title}")
	if len(headers) > 10:
	analysis.append(f" ... and {len(headers) - 10} more headers")

	analysis.append("")
	analysis.append("📖 CONTENT PREVIEW:")
	analysis.append(text[:500] + ("..." if len(text) > 500 else ""))

	return "\n".join(analysis)

	except Exception as e:
	return f"Error processing Markdown: {e!s}"

	def _process_unknown_file(self, content: bytes) -> str:
	"""Process unknown file type."""
	try:
	# Try to decode as text
	try:
	text = content.decode("utf-8", errors="ignore")
	is_text = True
	except:
	is_text = False

	analysis = []
	if is_text and len(text.strip()) > 0:
	analysis.append("Type: Text-based file")
	analysis.append(f"Lines: {len(text.split())}")
	analysis.append(f"Characters: {len(text)}")
	analysis.append("")
	analysis.append("📄 CONTENT PREVIEW:")
	analysis.append(text[:300] + ("..." if len(text) > 300 else ""))
	else:
	analysis.append("Type: Binary file")
	analysis.append("Content: Binary data (not text-readable)")
	analysis.append(f"First 32 bytes: {content[:32].hex()}")

	return "\n".join(analysis)

	except Exception as e:
	return f"Error processing unknown file: {e!s}"

	def _format_size(self, size_bytes: int) -> str:
	"""Format file size in human-readable format."""
	for unit in ["B", "KB", "MB", "GB"]:
	if size_bytes < 1024:
	return f"{size_bytes:.1f} {unit}"
	size_bytes /= 1024
	return f"{size_bytes:.1f} TB"


	# Initialize the file processor
	file_processor = FileProcessor()


	def process_file_mcp(file) -> str:
	"""
	MCP-compatible file processing function.

	Args:
	file: Gradio File object

	Returns:
	String with file analysis results
	"""
	try:
	if file is None:
	return "Error: No file uploaded"

	# Handle Gradio file input
	file_path = file.name if hasattr(file, "name") else None

	result = file_processor.process_file(file_path)
	return result

	except Exception as e:
	error_msg = f"Error processing file: {e!s}"
	logger.error(error_msg)
	return error_msg


	def create_gradio_interface():
	"""Create and configure the Gradio interface."""

	interface = gr.Interface(
	fn=process_file_mcp,
	inputs=[
	gr.File(
	label="Upload File",
	file_types=[".txt", ".csv", ".json", ".md", ".py", ".js", ".html", ".xml"]
	)
	],
	outputs=[
	gr.Textbox(
	label="File Analysis Results",
	lines=20,
	show_copy_button=True
	)
	],
	title="📁 MCP File Processor",
	description="""
	File Processing MCP Server

	Upload and analyze various file types:
	- Text files: Word count, content preview
	- CSV files: Data analysis, column info, statistics
	- JSON files: Structure analysis, key exploration
	- Markdown: Document structure, headers, links

	Supports: TXT, CSV, JSON, MD, PY, JS, HTML, XML files
	""",
	examples=[],
	allow_flagging="never",
	analytics_enabled=False
	)

	return interface


	def main():
	"""Main function to run the Gradio app."""
	port = int(os.getenv("GRADIO_SERVER_PORT", 7860))
	host = os.getenv("GRADIO_SERVER_NAME", "0.0.0.0")

	logger.info(f"Starting MCP File Processor on {host}:{port}")

	interface = create_gradio_interface()

	interface.launch(
	server_name=host,
	server_port=port,
	share=False,
	debug=False,
	quiet=False,
	show_error=True
	)


	if __name__ == "__main__":
	main()