Spaces:

ak0601
/

Friends_forever

Sleeping

App Files Files Community

Friends_forever / src /extract_features.py

ak0601

Upload 6 files

e386167 verified 14 days ago

raw

history blame

2.26 kB

	import re
	import random
	from collections import Counter, defaultdict

	def parse_chat(file_path):
	pattern = r"(\d{1,2}/\d{1,2}/\d{2,4}), (\d{1,2}:\d{2}) - ([^:]+): (.*)"
	messages = []

	with open(file_path, "r", encoding="utf-8") as f:
	for line in f:
	match = re.match(pattern, line)
	if match:
	date, time, sender, text = match.groups()

	# Normalize names
	if sender == "ak":
	sender = "Aman"
	elif sender == "Sarah con H":
	sender = "Sarah"

	messages.append({
	"date": date,
	"time": time,
	"sender": sender,
	"text": text.strip()
	})
	return messages


	def extract_inside_jokes(messages):
	funny_candidates = []
	cute_candidates = []
	memory_candidates = []
	phrase_counter = Counter()

	funny_keywords = ["lol", "😂", "🤣", "lmao", "funny", "haha", "hehe","hahaha"]
	cute_keywords = ["miss", "thank", "sweet", "cute", "proud", "happy","aww","glad"]

	for msg in messages:
	text = msg["text"].lower()

	# Funny moments
	if any(k in text for k in funny_keywords):
	funny_candidates.append(msg["text"])

	# Cute/emotional moments
	if any(k in text for k in cute_keywords):
	cute_candidates.append(msg["text"])

	# Memorable random moments
	if len(msg["text"].split()) > 4: # skip too short
	memory_candidates.append(msg["text"])

	# Count repeated words
	phrase_counter.update(text.split())

	top_words = [w for w, c in phrase_counter.most_common(40)]

	return {
	"funny": funny_candidates,
	"cute": cute_candidates,
	"memories": memory_candidates,
	"top_words": top_words
	}


	def random_memory(messages):
	"""Returns a random meaningful moment."""
	long_messages = [m["text"] for m in messages if len(m["text"]) > 10]
	if not long_messages:
	return "One of your old conversations ❤️"
	return random.choice(long_messages)