Spaces:

MCP-1st-Birthday
/

cx_ai_agent

Runtime error

App Files Files Community

muzakkirhussain011 commited on 24 days ago

Commit

52cf520

1 Parent(s): 819d651

Add application files

Browse files

Files changed (3) hide show

app.py +17 -14
services/__pycache__/enhanced_contact_finder.cpython-310.pyc +0 -0
services/enhanced_contact_finder.py +225 -351

app.py CHANGED Viewed

@@ -590,19 +590,21 @@ def get_contacts_html() -> str:
         """
     html = """
-    <div style="background: var(--info-bg, #d1ecf1); border: 1px solid var(--info-border, #bee5eb); border-radius: 8px; padding: 12px 16px; margin-bottom: 16px;">
-        <div style="font-size: 13px; color: var(--info-text, #0c5460);">
-            <strong>ℹ️ Contact Sources:</strong> Names are sourced from LinkedIn searches and company team pages.
-            Email addresses use common business formats. Contacts marked <strong>VERIFIED</strong> were found in public web sources.
         </div>
     </div>
     """
     for c in reversed(knowledge_base["contacts"]):
-        is_verified = c.get("verified", False)
-        source = c.get("source", "unknown")
-        verified_badge = "VERIFIED" if is_verified else "ESTIMATED"
-        badge_class = "badge-engaged" if is_verified else "badge-researched"
-        source_text = "from web search" if is_verified else "estimated"
         html += f"""
         <div class="prospect-card" style="padding: 16px 20px;">
             <div style="display: flex; justify-content: space-between; align-items: center;">
@@ -610,10 +612,11 @@ def get_contacts_html() -> str:
                     <div style="font-weight: 600; color: var(--text-primary);">👤 {c.get("name", "Unknown")}</div>
                     <div style="font-size: 13px; color: var(--text-secondary); margin-top: 4px;">{c.get("title", "Unknown title")}</div>
                     <div style="font-size: 13px; color: var(--text-secondary);">🏢 {c.get("company", "Unknown company")}</div>
-                    {f'<div style="font-size: 13px; color: var(--primary-blue); margin-top: 4px;">📧 {c.get("email")} <span style="font-size: 11px; color: var(--text-secondary);">({source_text})</span></div>' if c.get("email") else ''}
                 </div>
-                <span class="prospect-card-badge {badge_class}">{verified_badge}</span>
             </div>
         </div>
         """
     return html
@@ -1138,12 +1141,12 @@ After processing {num_prospects} prospects, provide summary:
                             output += f"  - {p.get('summary')[:150]}...\n" if len(p.get('summary', '')) > 150 else f"  - {p.get('summary')}\n"
                     if contacts_found:
-                        output += "\n### 👥 Decision Makers Found\n\n"
-                        output += "> ⚠️ **Note:** Names are from web searches. Emails are *estimated* ([email protected] format). Please verify before outreach.\n\n"
                         for c in contacts_found:
                             output += f"- **{c.get('name', 'Unknown')}** - {c.get('title', 'Unknown')} at {c.get('company', 'Unknown')}\n"
                             if c.get('email'):
-                                output += f"  - Email: {c.get('email')} *(estimated)*\n"
                     if emails_drafted:
                         output += "\n### ✉️ Emails Drafted\n\n"

         """
     html = """
+    <div style="background: var(--success-bg, #d4edda); border: 1px solid var(--success-border, #c3e6cb); border-radius: 8px; padding: 12px 16px; margin-bottom: 16px;">
+        <div style="font-size: 13px; color: var(--success-text, #155724);">
+            <strong>✅ Verified Contacts:</strong> All contacts shown here were found through web searches of LinkedIn profiles,
+            company team pages, and public directories. Only contacts with <strong>verified email addresses</strong> found on the web are displayed.
         </div>
     </div>
     """
     for c in reversed(knowledge_base["contacts"]):
+        source = c.get("source", "web_search")
+        source_label = {
+            "web_search": "Found via web search",
+            "linkedin": "Found via LinkedIn",
+            "team_page": "Found on company page",
+            "web_search_and_scraping": "Verified from web"
+        }.get(source, "Verified")
         html += f"""
         <div class="prospect-card" style="padding: 16px 20px;">
             <div style="display: flex; justify-content: space-between; align-items: center;">
                     <div style="font-weight: 600; color: var(--text-primary);">👤 {c.get("name", "Unknown")}</div>
                     <div style="font-size: 13px; color: var(--text-secondary); margin-top: 4px;">{c.get("title", "Unknown title")}</div>
                     <div style="font-size: 13px; color: var(--text-secondary);">🏢 {c.get("company", "Unknown company")}</div>
+                    {f'<div style="font-size: 13px; color: var(--primary-blue); margin-top: 4px;">📧 {c.get("email")}</div>' if c.get("email") else ''}
                 </div>
+                <span class="prospect-card-badge badge-engaged">VERIFIED</span>
             </div>
+            <div style="font-size: 11px; color: var(--text-secondary); margin-top: 8px;">{source_label}</div>
         </div>
         """
     return html
                             output += f"  - {p.get('summary')[:150]}...\n" if len(p.get('summary', '')) > 150 else f"  - {p.get('summary')}\n"
                     if contacts_found:
+                        output += "\n### 👥 Verified Decision Makers Found\n\n"
+                        output += "> ✅ **Verified:** These contacts were found through web searches of LinkedIn, company pages, and public sources.\n\n"
                         for c in contacts_found:
                             output += f"- **{c.get('name', 'Unknown')}** - {c.get('title', 'Unknown')} at {c.get('company', 'Unknown')}\n"
                             if c.get('email'):
+                                output += f"  - Email: {c.get('email')} *(verified)*\n"
                     if emails_drafted:
                         output += "\n### ✉️ Emails Drafted\n\n"

services/__pycache__/enhanced_contact_finder.cpython-310.pyc CHANGED Viewed

Binary files a/services/__pycache__/enhanced_contact_finder.cpython-310.pyc and b/services/__pycache__/enhanced_contact_finder.cpython-310.pyc differ

services/enhanced_contact_finder.py CHANGED Viewed

@@ -72,15 +72,11 @@ class EnhancedContactFinder:
             r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\s*\n\s*([A-Z][^,\n]+)',
         ]
-        # Common email formats to try
-        self.email_formats = [
             '{first}.{last}',      # [email protected]
             '{first}{last}',       # [email protected]
-            '{first}_{last}',      # [email protected]
-            '{first}-{last}',      # [email protected]
-            '{first}',             # [email protected]
-            '{f}{last}',           # [email protected]
-            '{first}{l}',          # [email protected]
         ]
     async def find_real_contacts(
@@ -91,123 +87,74 @@ class EnhancedContactFinder:
         max_contacts: int = 3
     ) -> List[Contact]:
         """
-        Find real decision-makers using multiple strategies
         Returns:
-            List of Contact objects with real names and emails
         """
-        logger.info(f"EnhancedFinder: Finding real contacts at '{company_name}'")
         print(f"\n[CONTACT FINDER] Starting search for {company_name}")
         print(f"[CONTACT FINDER] Domain: {domain}")
         print(f"[CONTACT FINDER] Target titles: {target_titles}")
         print(f"[CONTACT FINDER] Max contacts: {max_contacts}")
         contacts = []
         seen_emails: Set[str] = set()
-        # Strategy 1: Search LinkedIn profiles for each title
-        print(f"[CONTACT FINDER] Strategy 1: Searching LinkedIn profiles...")
-        for title in target_titles[:max_contacts * 2]:  # Search more than needed
-            print(f"[CONTACT FINDER]   Searching for: {title}")
-            linkedin_contact = await self._find_linkedin_contact(
-                company_name,
-                title,
-                domain,
-                seen_emails
-            )
-            if linkedin_contact:
-                contacts.append(linkedin_contact)
-                seen_emails.add(linkedin_contact.email.lower())
-                logger.info(f"EnhancedFinder: Found {title} via LinkedIn search")
-                print(f"[CONTACT FINDER]   ✓ FOUND: {linkedin_contact.name} ({linkedin_contact.title}) - {linkedin_contact.email}")
-                if len(contacts) >= max_contacts:
-                    print(f"[CONTACT FINDER] Found enough contacts ({len(contacts)}), returning early")
-                    return contacts
-            else:
-                print(f"[CONTACT FINDER]   ✗ Not found via LinkedIn")
-        # Strategy 2: Scrape company team pages
         if len(contacts) < max_contacts:
-            team_contacts = await self._scrape_team_pages(
                 company_name,
                 domain,
                 target_titles,
                 seen_emails,
                 max_contacts - len(contacts)
             )
-            contacts.extend(team_contacts)
-            logger.info(f"EnhancedFinder: Found {len(team_contacts)} contacts from team pages")
-        # Strategy 3: General web search for contact info
         if len(contacts) < max_contacts:
-            web_contacts = await self._search_company_contacts(
                 company_name,
                 domain,
                 target_titles,
                 seen_emails,
                 max_contacts - len(contacts)
             )
-            contacts.extend(web_contacts)
-            logger.info(f"EnhancedFinder: Found {len(web_contacts)} contacts from web search")
-        logger.info(f"EnhancedFinder: Total {len(contacts)} real contacts found for '{company_name}'")
         print(f"[CONTACT FINDER] === FINAL RESULT ===")
-        print(f"[CONTACT FINDER] Total contacts found: {len(contacts)}")
         for i, contact in enumerate(contacts[:max_contacts], 1):
-            print(f"[CONTACT FINDER]   {i}. {contact.name} ({contact.title}) - {contact.email}")
         print(f"[CONTACT FINDER] ====================\n")
         return contacts[:max_contacts]
-    async def _find_linkedin_contact(
-        self,
-        company_name: str,
-        title: str,
-        domain: str,
-        seen_emails: Set[str]
-    ) -> Optional[Contact]:
-        """Search LinkedIn specifically for decision-makers"""
-        # LinkedIn-specific search queries
-        queries = [
-            f'site:linkedin.com/in {title} at {company_name}',
-            f'linkedin {company_name} {title}',
-            f'"{title}" "{company_name}" linkedin.com',
-        ]
-        for query in queries:
-            try:
-                print(f"[CONTACT FINDER]     Query: '{query}'")
-                results = await self.search.search(query, max_results=5)
-                print(f"[CONTACT FINDER]     Results: {len(results)} found")
-                for result in results:
-                    # Extract name and title from LinkedIn result
-                    contact_info = self._extract_linkedin_info(result, title, company_name)
-                    if contact_info and contact_info.get('name'):
-                        name = contact_info['name']
-                        detected_title = contact_info.get('title', title)
-                        # Generate email from name
-                        email = await self._generate_verified_email(name, domain, seen_emails)
-                        if email:
-                            return Contact(
-                                id=str(uuid.uuid4()),
-                                name=name,
-                                email=email,
-                                title=detected_title,
-                                prospect_id=""
-                            )
-            except Exception as e:
-                logger.debug(f"EnhancedFinder: LinkedIn search error for '{query}': {str(e)}")
-                continue
-        return None
-    async def _scrape_team_pages(
         self,
         company_name: str,
         domain: str,
@@ -215,148 +162,54 @@ class EnhancedContactFinder:
         seen_emails: Set[str],
         max_needed: int
     ) -> List[Contact]:
-        """Scrape company team/about pages for contact information"""
         contacts = []
-        # Try to find team page URLs
-        team_page_queries = [
-            f'site:{domain} team',
-            f'site:{domain} leadership',
-            f'site:{domain} about us',
-            f'{company_name} team page',
         ]
-        team_page_urls = set()
-        for query in team_page_queries:
             try:
-                results = await self.search.search(query, max_results=3)
-                for result in results:
-                    url = result.get('url', '')
-                    if domain in url and any(pattern in url.lower() for pattern in self.team_page_patterns):
-                        team_page_urls.add(url)
-            except Exception as e:
-                logger.debug(f"EnhancedFinder: Team page search error: {str(e)}")
-                continue
-        # Also try common team page URLs directly
-        for pattern in self.team_page_patterns[:3]:  # Try top 3 patterns
-            team_page_urls.add(f"https://{domain}{pattern}")
-            team_page_urls.add(f"https://www.{domain}{pattern}")
-        # Scrape each team page
-        for url in list(team_page_urls)[:5]:  # Limit to 5 pages
-            try:
-                page_contacts = await self._extract_contacts_from_page(
-                    url,
-                    company_name,
-                    domain,
-                    target_titles,
-                    seen_emails
-                )
-                for contact in page_contacts:
-                    if contact.email.lower() not in seen_emails:
-                        contacts.append(contact)
-                        seen_emails.add(contact.email.lower())
-                        if len(contacts) >= max_needed:
-                            return contacts
             except Exception as e:
-                logger.debug(f"EnhancedFinder: Error scraping {url}: {str(e)}")
                 continue
         return contacts
-    async def _extract_contacts_from_page(
-        self,
-        url: str,
-        company_name: str,
-        domain: str,
-        target_titles: List[str],
-        seen_emails: Set[str]
-    ) -> List[Contact]:
-        """Extract contact information from a webpage"""
-        contacts = []
-        try:
-            # Scrape the page
-            page_content = await self.scraper.scrape_page(url)
-            if not page_content:
-                return contacts
-            # Extract all text
-            text = page_content.get('text', '')
-            # Find all potential contacts using regex patterns
-            potential_contacts = []
-            for pattern in self.name_patterns:
-                matches = re.finditer(pattern, text, re.MULTILINE)
-                for match in matches:
-                    name = match.group(1).strip()
-                    title = match.group(2).strip() if len(match.groups()) > 1 else ""
-                    # Validate name
-                    if self._is_valid_name(name):
-                        potential_contacts.append({
-                            'name': name,
-                            'title': title
-                        })
-            # Also look for email addresses directly on the page
-            email_pattern = r'\b[A-Za-z0-9._%+-]+@' + re.escape(domain) + r'\b'
-            found_emails = re.findall(email_pattern, text, re.IGNORECASE)
-            # Match contacts with titles we're looking for
-            for pc in potential_contacts:
-                name = pc['name']
-                title = pc['title']
-                # Check if title matches any of our target titles
-                title_match = any(
-                    target.lower() in title.lower() or title.lower() in target.lower()
-                    for target in target_titles
-                )
-                if title_match or not pc['title']:  # Include if title matches or no title found
-                    # Try to find email for this person
-                    email = None
-                    # First, check if we found a direct email for this person on the page
-                    name_parts = name.lower().split()
-                    for found_email in found_emails:
-                        if any(part in found_email.lower() for part in name_parts):
-                            email = found_email
-                            break
-                    # If no direct email, generate one
-                    if not email:
-                        email = await self._generate_verified_email(name, domain, seen_emails)
-                    if email and email.lower() not in seen_emails:
-                        # Use matched title or best guess from target titles
-                        final_title = title if title else target_titles[0]
-                        contacts.append(Contact(
-                            id=str(uuid.uuid4()),
-                            name=name,
-                            email=email,
-                            title=final_title,
-                            prospect_id=""
-                        ))
-                        seen_emails.add(email.lower())
-        except Exception as e:
-            logger.error(f"EnhancedFinder: Error extracting contacts from {url}: {str(e)}")
-        return contacts
-    async def _search_company_contacts(
         self,
         company_name: str,
         domain: str,
@@ -364,122 +217,188 @@ class EnhancedContactFinder:
         seen_emails: Set[str],
         max_needed: int
     ) -> List[Contact]:
-        """General web search for company contacts"""
         contacts = []
-        # Broader search queries
-        queries = [
-            f'{company_name} executives contact',
-            f'{company_name} leadership team',
-            f'{company_name} customer experience contact',
-            f'"{company_name}" "customer success" contact email',
         ]
-        for query in queries:
             try:
-                results = await self.search.search(query, max_results=5)
-                for result in results:
-                    # Try to extract contact info
-                    extracted = self._extract_contact_from_text(
-                        result.get('title', '') + ' ' + result.get('body', ''),
-                        target_titles,
-                        company_name
-                    )
-                    if extracted:
-                        name = extracted['name']
-                        title = extracted['title']
-                        email = await self._generate_verified_email(name, domain, seen_emails)
-                        if email and email.lower() not in seen_emails:
                             contacts.append(Contact(
                                 id=str(uuid.uuid4()),
                                 name=name,
                                 email=email,
-                                title=title,
                                 prospect_id=""
                             ))
                             seen_emails.add(email.lower())
                             if len(contacts) >= max_needed:
                                 return contacts
             except Exception as e:
-                logger.debug(f"EnhancedFinder: Search error for '{query}': {str(e)}")
                 continue
         return contacts
-    def _extract_linkedin_info(
         self,
-        result: Dict,
-        expected_title: str,
-        company_name: str
-    ) -> Optional[Dict]:
-        """Extract name and title from LinkedIn search result"""
-        text = result.get('title', '') + ' ' + result.get('body', '')
-        # LinkedIn title format: "Name - Title at Company | LinkedIn"
-        linkedin_pattern = r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\s*[-–—]\s*([^|]+?)\s*(?:at|@)\s*([^|]+)'
-        match = re.search(linkedin_pattern, text)
-        if match:
-            name = match.group(1).strip()
-            title = match.group(2).strip()
-            company = match.group(3).strip()
-            # Validate that it's the right company
-            if company_name.lower() in company.lower() and self._is_valid_name(name):
-                return {
-                    'name': name,
-                    'title': title
-                }
-        # Try other patterns
-        for pattern in self.name_patterns:
-            match = re.search(pattern, text)
-            if match and len(match.groups()) >= 2:
-                name = match.group(1).strip()
-                title = match.group(2).strip()
-                if self._is_valid_name(name):
-                    return {
-                        'name': name,
-                        'title': title
-                    }
-        return None
-    def _extract_contact_from_text(
-        self,
-        text: str,
-        target_titles: List[str],
-        company_name: str
-    ) -> Optional[Dict]:
-        """Extract contact information from general text"""
         for pattern in self.name_patterns:
-            matches = re.finditer(pattern, text, re.MULTILINE)
-            for match in matches:
-                if len(match.groups()) >= 2:
-                    name = match.group(1).strip()
-                    title = match.group(2).strip()
-                    # Check if title matches our targets
-                    if self._is_valid_name(name) and any(
-                        target.lower() in title.lower() or title.lower() in target.lower()
-                        for target in target_titles
-                    ):
-                        return {
-                            'name': name,
-                            'title': title
-                        }
         return None
     def _is_valid_name(self, name: str) -> bool:
@@ -515,51 +434,6 @@ class EnhancedContactFinder:
         return True
-    async def _generate_verified_email(
-        self,
-        name: str,
-        domain: str,
-        seen_emails: Set[str]
-    ) -> Optional[str]:
-        """Generate and validate email address from name"""
-        # Clean name
-        name_clean = re.sub(r"[^a-zA-Z\s]", "", name).strip().lower()
-        parts = name_clean.split()
-        if len(parts) < 2:
-            return None
-        first = parts[0]
-        last = parts[-1]
-        # Try different email formats
-        for fmt in self.email_formats:
-            try:
-                email_prefix = fmt.format(
-                    first=first,
-                    last=last,
-                    f=first[0] if first else '',
-                    l=last[0] if last else ''
-                )
-                email = f"{email_prefix}@{domain}"
-                # Validate format
-                validated = validate_email(email, check_deliverability=False)
-                normalized = validated.normalized
-                # Check if not seen
-                if normalized.lower() not in seen_emails:
-                    # Check if not a generic email
-                    if not self._is_generic_email(email_prefix):
-                        return normalized
-            except EmailNotValidError:
-                continue
-        return None
     def _is_generic_email(self, prefix: str) -> bool:
         """Check if email prefix is generic (info, contact, etc.)"""

             r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\s*\n\s*([A-Z][^,\n]+)',
         ]
+        # We do NOT estimate emails - only use verified emails found on web
+        # This list is kept for reference but not used for generation
+        self._common_email_patterns = [
             '{first}.{last}',      # [email protected]
             '{first}{last}',       # [email protected]
         ]
     async def find_real_contacts(
         max_contacts: int = 3
     ) -> List[Contact]:
         """
+        Find real decision-makers with VERIFIED email addresses.
+        IMPORTANT: Only returns contacts where we found ACTUAL email addresses
+        from web sources. Does NOT generate or estimate emails.
         Returns:
+            List of Contact objects with verified names and emails only
         """
+        logger.info(f"EnhancedFinder: Finding VERIFIED contacts at '{company_name}'")
         print(f"\n[CONTACT FINDER] Starting search for {company_name}")
         print(f"[CONTACT FINDER] Domain: {domain}")
         print(f"[CONTACT FINDER] Target titles: {target_titles}")
         print(f"[CONTACT FINDER] Max contacts: {max_contacts}")
+        print(f"[CONTACT FINDER] NOTE: Only returning contacts with VERIFIED emails found on web")
         contacts = []
         seen_emails: Set[str] = set()
+        # Strategy 1: Search for actual email addresses directly
+        print(f"[CONTACT FINDER] Strategy 1: Searching for actual email addresses...")
+        email_contacts = await self._search_for_emails(
+            company_name,
+            domain,
+            target_titles,
+            seen_emails,
+            max_contacts
+        )
+        contacts.extend(email_contacts)
+        print(f"[CONTACT FINDER]   Found {len(email_contacts)} contacts with verified emails")
+        # Strategy 2: Scrape company team/contact pages for emails
         if len(contacts) < max_contacts:
+            print(f"[CONTACT FINDER] Strategy 2: Scraping company pages for contact emails...")
+            scraped_contacts = await self._scrape_for_verified_emails(
                 company_name,
                 domain,
                 target_titles,
                 seen_emails,
                 max_contacts - len(contacts)
             )
+            contacts.extend(scraped_contacts)
+            print(f"[CONTACT FINDER]   Found {len(scraped_contacts)} contacts from page scraping")
+        # Strategy 3: Search LinkedIn + news for names WITH email mentions
         if len(contacts) < max_contacts:
+            print(f"[CONTACT FINDER] Strategy 3: Searching for executives with public emails...")
+            linkedin_contacts = await self._find_contacts_with_emails(
                 company_name,
                 domain,
                 target_titles,
                 seen_emails,
                 max_contacts - len(contacts)
             )
+            contacts.extend(linkedin_contacts)
+            print(f"[CONTACT FINDER]   Found {len(linkedin_contacts)} contacts from web search")
+        logger.info(f"EnhancedFinder: Total {len(contacts)} VERIFIED contacts found for '{company_name}'")
         print(f"[CONTACT FINDER] === FINAL RESULT ===")
+        print(f"[CONTACT FINDER] Total VERIFIED contacts found: {len(contacts)}")
         for i, contact in enumerate(contacts[:max_contacts], 1):
+            print(f"[CONTACT FINDER]   {i}. {contact.name} ({contact.title}) - {contact.email} [VERIFIED]")
+        if len(contacts) == 0:
+            print(f"[CONTACT FINDER]   No contacts with verified emails found.")
+            print(f"[CONTACT FINDER]   This is normal - many companies don't publish executive emails.")
         print(f"[CONTACT FINDER] ====================\n")
         return contacts[:max_contacts]
+    async def _search_for_emails(
         self,
         company_name: str,
         domain: str,
         seen_emails: Set[str],
         max_needed: int
     ) -> List[Contact]:
+        """Search specifically for email addresses associated with company executives"""
         contacts = []
+        # Direct email search queries
+        email_queries = [
+            f'"{domain}" email CEO OR founder OR director',
+            f'"{company_name}" contact email executive',
+            f'site:{domain} email contact',
+            f'"{company_name}" "@{domain}" CEO OR VP OR director',
         ]
+        for query in email_queries:
             try:
+                print(f"[CONTACT FINDER]     Query: '{query}'")
+                results = await self.search.search(query, max_results=10)
+                for result in results:
+                    text = result.get('title', '') + ' ' + result.get('body', '')
+                    # Extract emails from text
+                    found_emails = self._extract_emails_from_text(text, domain)
+                    for email in found_emails:
+                        if email.lower() not in seen_emails and not self._is_generic_email(email.split('@')[0]):
+                            # Try to find associated name and title
+                            name, title = self._extract_name_near_email(text, email, target_titles)
+                            if name:
+                                contacts.append(Contact(
+                                    id=str(uuid.uuid4()),
+                                    name=name,
+                                    email=email,
+                                    title=title or "Executive",
+                                    prospect_id=""
+                                ))
+                                seen_emails.add(email.lower())
+                                print(f"[CONTACT FINDER]     ✓ FOUND: {name} - {email}")
+                                if len(contacts) >= max_needed:
+                                    return contacts
             except Exception as e:
+                logger.debug(f"Email search error: {str(e)}")
                 continue
         return contacts
+    async def _scrape_for_verified_emails(
         self,
         company_name: str,
         domain: str,
         seen_emails: Set[str],
         max_needed: int
     ) -> List[Contact]:
+        """Scrape company pages to find actual email addresses"""
         contacts = []
+        # Pages likely to have contact info
+        pages_to_check = [
+            f"https://{domain}/contact",
+            f"https://{domain}/contact-us",
+            f"https://{domain}/about",
+            f"https://{domain}/about-us",
+            f"https://{domain}/team",
+            f"https://{domain}/leadership",
+            f"https://{domain}/our-team",
+            f"https://www.{domain}/contact",
+            f"https://www.{domain}/about",
+            f"https://www.{domain}/team",
         ]
+        for url in pages_to_check:
             try:
+                page_content = await self.scraper.scrape_page(url)
+                if not page_content:
+                    continue
+                text = page_content.get('text', '')
+                # Find all emails on page
+                found_emails = self._extract_emails_from_text(text, domain)
+                for email in found_emails:
+                    if email.lower() not in seen_emails and not self._is_generic_email(email.split('@')[0]):
+                        # Try to find associated name
+                        name, title = self._extract_name_near_email(text, email, target_titles)
+                        if name:
                             contacts.append(Contact(
                                 id=str(uuid.uuid4()),
                                 name=name,
                                 email=email,
+                                title=title or "Contact",
                                 prospect_id=""
                             ))
                             seen_emails.add(email.lower())
+                            print(f"[CONTACT FINDER]     ✓ SCRAPED: {name} - {email} from {url}")
                             if len(contacts) >= max_needed:
                                 return contacts
             except Exception as e:
+                logger.debug(f"Scrape error for {url}: {str(e)}")
                 continue
         return contacts
+    async def _find_contacts_with_emails(
         self,
+        company_name: str,
+        domain: str,
+        target_titles: List[str],
+        seen_emails: Set[str],
+        max_needed: int
+    ) -> List[Contact]:
+        """Search for executives and only return those with verified emails"""
+        contacts = []
+        for title in target_titles:
+            # Search for person WITH email mention
+            queries = [
+                f'"{company_name}" {title} email "@{domain}"',
+                f'"{company_name}" {title} contact email',
+                f'site:linkedin.com "{company_name}" {title} email',
+            ]
+            for query in queries:
+                try:
+                    results = await self.search.search(query, max_results=5)
+                    for result in results:
+                        text = result.get('title', '') + ' ' + result.get('body', '')
+                        # Only proceed if we find an actual email
+                        found_emails = self._extract_emails_from_text(text, domain)
+                        for email in found_emails:
+                            if email.lower() not in seen_emails and not self._is_generic_email(email.split('@')[0]):
+                                # Extract name from text
+                                name = self._extract_name_from_text(text, company_name)
+                                if name:
+                                    contacts.append(Contact(
+                                        id=str(uuid.uuid4()),
+                                        name=name,
+                                        email=email,
+                                        title=title,
+                                        prospect_id=""
+                                    ))
+                                    seen_emails.add(email.lower())
+                                    print(f"[CONTACT FINDER]     ✓ FOUND: {name} ({title}) - {email}")
+                                    if len(contacts) >= max_needed:
+                                        return contacts
+                except Exception as e:
+                    logger.debug(f"Search error: {str(e)}")
+                    continue
+        return contacts
+    def _extract_emails_from_text(self, text: str, domain: str) -> List[str]:
+        """Extract email addresses from text, prioritizing company domain"""
+        if not text:
+            return []
+        # Find all emails
+        email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
+        all_emails = re.findall(email_pattern, text, re.IGNORECASE)
+        # Prioritize company domain emails
+        company_emails = [e for e in all_emails if domain.lower() in e.lower()]
+        # Filter out junk
+        filtered = []
+        ignore_patterns = ['example.com', 'domain.com', 'email.com', 'test.com', 'sample.com',
+                          'noreply', 'no-reply', 'donotreply', 'unsubscribe', 'privacy',
+                          'support@', 'info@', 'contact@', 'hello@', 'sales@', 'help@']
+        for email in company_emails:
+            if not any(pattern in email.lower() for pattern in ignore_patterns):
+                filtered.append(email.lower())
+        return list(set(filtered))
+    def _extract_name_near_email(self, text: str, email: str, target_titles: List[str]) -> tuple:
+        """Extract name that appears near an email address"""
+        if not text or not email:
+            return None, None
+        # Find context around email (200 chars before and after)
+        email_pos = text.lower().find(email.lower())
+        if email_pos == -1:
+            return None, None
+        start = max(0, email_pos - 200)
+        end = min(len(text), email_pos + len(email) + 200)
+        context = text[start:end]
+        # Look for name patterns in context
+        name = None
+        title = None
+        # Try to find name-title patterns
         for pattern in self.name_patterns:
+            match = re.search(pattern, context)
+            if match:
+                potential_name = match.group(1).strip()
+                if self._is_valid_name(potential_name):
+                    name = potential_name
+                    if len(match.groups()) > 1:
+                        title = match.group(2).strip()
+                    break
+        # If no name found, try simpler extraction
+        if not name:
+            # Look for capitalized name-like words near email
+            words = context.split()
+            for i, word in enumerate(words):
+                if word and word[0].isupper() and len(word) > 2:
+                    if i + 1 < len(words) and words[i+1] and words[i+1][0].isupper():
+                        potential_name = f"{word} {words[i+1]}"
+                        if self._is_valid_name(potential_name):
+                            name = potential_name
+                            break
+        return name, title
+    def _extract_name_from_text(self, text: str, company_name: str) -> Optional[str]:
+        """Extract a person's name from text"""
+        for pattern in self.name_patterns:
+            match = re.search(pattern, text)
+            if match:
+                name = match.group(1).strip()
+                if self._is_valid_name(name) and company_name.lower() not in name.lower():
+                    return name
         return None
     def _is_valid_name(self, name: str) -> bool:
         return True
     def _is_generic_email(self, prefix: str) -> bool:
         """Check if email prefix is generic (info, contact, etc.)"""