File size: 2,957 Bytes
15b4e3f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
from sklearn.metrics.pairwise import cosine_similarity
from utils.logging_config import get_logger
import numpy as np
from typing import Any, Dict
from collections import Counter
logger = get_logger(__name__)
def compute_similarity(resume_emb: Any, job_emb: Any) -> float:
try:
if resume_emb is None or job_emb is None:
logger.warning("One or both embeddings are None")
return 0.0
score = float(cosine_similarity([resume_emb], [job_emb])[0][0])
logger.debug("Computed cosine similarity: %s", score)
return score
except Exception as exc:
logger.exception("Failed to compute similarity: %s", exc)
return 0.0
def compute_skill_match(resume_skills: Dict[str, Any], job_skills: Dict[str, Any], job_text: str, top_n: int = 20) -> Dict[str, Any]:
try:
resume_set = set(resume_skills.get("dict_skills", []) +
resume_skills.get("fuzzy_skills", []))
job_list = job_skills.get("dict_skills", []) + \
job_skills.get("fuzzy_skills", [])
job_set = set(job_list)
overlap = resume_set & job_set
missing = job_set - resume_set
if len(job_set) == 0:
skill_score = 0.0
else:
skill_score = len(overlap) / len(job_set)
# --- Rank missing skills by frequency in job description ---
job_tokens = [t.lower() for t in job_text.split()]
freq_counter = Counter(job_tokens)
# Score missing skills by frequency in job description
ranked_missing = sorted(
missing,
key=lambda skill: freq_counter.get(skill.lower(), 0),
reverse=True
)[:top_n]
result = {
"skill_score": round(skill_score, 2),
"overlap": sorted(list(overlap)),
"missing": ranked_missing, # now limited & ranked
}
logger.debug("Computed skill match with ranking: %s", result)
return result
except Exception as exc:
logger.exception("Failed to compute skill match: %s", exc)
return {"skill_score": 0.0, "overlap": [], "missing": []}
def interpret_similarity(score: float) -> str:
try:
if score >= 0.8:
return "β
Excellent match! You should definitely apply for this job."
elif score >= 0.65:
return "π Good match. You stand a strong chance β applying is recommended."
elif score >= 0.5:
return "β οΈ Partial match. Consider improving your resume by adding missing relevant skills."
else:
return "β Weak match. Your resume and the job description differ significantly. Tailoring your resume is highly recommended."
except Exception as exc:
logger.exception("Failed to interpret similarity score: %s", exc)
return "Score interpretation unavailable."
|