muzakkirhussain011 commited on
Commit
52cf520
·
1 Parent(s): 819d651

Add application files

Browse files
app.py CHANGED
@@ -590,19 +590,21 @@ def get_contacts_html() -> str:
590
  """
591
 
592
  html = """
593
- <div style="background: var(--info-bg, #d1ecf1); border: 1px solid var(--info-border, #bee5eb); border-radius: 8px; padding: 12px 16px; margin-bottom: 16px;">
594
- <div style="font-size: 13px; color: var(--info-text, #0c5460);">
595
- <strong>ℹ️ Contact Sources:</strong> Names are sourced from LinkedIn searches and company team pages.
596
- Email addresses use common business formats. Contacts marked <strong>VERIFIED</strong> were found in public web sources.
597
  </div>
598
  </div>
599
  """
600
  for c in reversed(knowledge_base["contacts"]):
601
- is_verified = c.get("verified", False)
602
- source = c.get("source", "unknown")
603
- verified_badge = "VERIFIED" if is_verified else "ESTIMATED"
604
- badge_class = "badge-engaged" if is_verified else "badge-researched"
605
- source_text = "from web search" if is_verified else "estimated"
 
 
606
  html += f"""
607
  <div class="prospect-card" style="padding: 16px 20px;">
608
  <div style="display: flex; justify-content: space-between; align-items: center;">
@@ -610,10 +612,11 @@ def get_contacts_html() -> str:
610
  <div style="font-weight: 600; color: var(--text-primary);">👤 {c.get("name", "Unknown")}</div>
611
  <div style="font-size: 13px; color: var(--text-secondary); margin-top: 4px;">{c.get("title", "Unknown title")}</div>
612
  <div style="font-size: 13px; color: var(--text-secondary);">🏢 {c.get("company", "Unknown company")}</div>
613
- {f'<div style="font-size: 13px; color: var(--primary-blue); margin-top: 4px;">📧 {c.get("email")} <span style="font-size: 11px; color: var(--text-secondary);">({source_text})</span></div>' if c.get("email") else ''}
614
  </div>
615
- <span class="prospect-card-badge {badge_class}">{verified_badge}</span>
616
  </div>
 
617
  </div>
618
  """
619
  return html
@@ -1138,12 +1141,12 @@ After processing {num_prospects} prospects, provide summary:
1138
  output += f" - {p.get('summary')[:150]}...\n" if len(p.get('summary', '')) > 150 else f" - {p.get('summary')}\n"
1139
 
1140
  if contacts_found:
1141
- output += "\n### 👥 Decision Makers Found\n\n"
1142
- output += "> ⚠️ **Note:** Names are from web searches. Emails are *estimated* ([email protected] format). Please verify before outreach.\n\n"
1143
  for c in contacts_found:
1144
  output += f"- **{c.get('name', 'Unknown')}** - {c.get('title', 'Unknown')} at {c.get('company', 'Unknown')}\n"
1145
  if c.get('email'):
1146
- output += f" - Email: {c.get('email')} *(estimated)*\n"
1147
 
1148
  if emails_drafted:
1149
  output += "\n### ✉️ Emails Drafted\n\n"
 
590
  """
591
 
592
  html = """
593
+ <div style="background: var(--success-bg, #d4edda); border: 1px solid var(--success-border, #c3e6cb); border-radius: 8px; padding: 12px 16px; margin-bottom: 16px;">
594
+ <div style="font-size: 13px; color: var(--success-text, #155724);">
595
+ <strong>✅ Verified Contacts:</strong> All contacts shown here were found through web searches of LinkedIn profiles,
596
+ company team pages, and public directories. Only contacts with <strong>verified email addresses</strong> found on the web are displayed.
597
  </div>
598
  </div>
599
  """
600
  for c in reversed(knowledge_base["contacts"]):
601
+ source = c.get("source", "web_search")
602
+ source_label = {
603
+ "web_search": "Found via web search",
604
+ "linkedin": "Found via LinkedIn",
605
+ "team_page": "Found on company page",
606
+ "web_search_and_scraping": "Verified from web"
607
+ }.get(source, "Verified")
608
  html += f"""
609
  <div class="prospect-card" style="padding: 16px 20px;">
610
  <div style="display: flex; justify-content: space-between; align-items: center;">
 
612
  <div style="font-weight: 600; color: var(--text-primary);">👤 {c.get("name", "Unknown")}</div>
613
  <div style="font-size: 13px; color: var(--text-secondary); margin-top: 4px;">{c.get("title", "Unknown title")}</div>
614
  <div style="font-size: 13px; color: var(--text-secondary);">🏢 {c.get("company", "Unknown company")}</div>
615
+ {f'<div style="font-size: 13px; color: var(--primary-blue); margin-top: 4px;">📧 {c.get("email")}</div>' if c.get("email") else ''}
616
  </div>
617
+ <span class="prospect-card-badge badge-engaged">VERIFIED</span>
618
  </div>
619
+ <div style="font-size: 11px; color: var(--text-secondary); margin-top: 8px;">{source_label}</div>
620
  </div>
621
  """
622
  return html
 
1141
  output += f" - {p.get('summary')[:150]}...\n" if len(p.get('summary', '')) > 150 else f" - {p.get('summary')}\n"
1142
 
1143
  if contacts_found:
1144
+ output += "\n### 👥 Verified Decision Makers Found\n\n"
1145
+ output += "> **Verified:** These contacts were found through web searches of LinkedIn, company pages, and public sources.\n\n"
1146
  for c in contacts_found:
1147
  output += f"- **{c.get('name', 'Unknown')}** - {c.get('title', 'Unknown')} at {c.get('company', 'Unknown')}\n"
1148
  if c.get('email'):
1149
+ output += f" - Email: {c.get('email')} *(verified)*\n"
1150
 
1151
  if emails_drafted:
1152
  output += "\n### ✉️ Emails Drafted\n\n"
services/__pycache__/enhanced_contact_finder.cpython-310.pyc CHANGED
Binary files a/services/__pycache__/enhanced_contact_finder.cpython-310.pyc and b/services/__pycache__/enhanced_contact_finder.cpython-310.pyc differ
 
services/enhanced_contact_finder.py CHANGED
@@ -72,15 +72,11 @@ class EnhancedContactFinder:
72
  r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\s*\n\s*([A-Z][^,\n]+)',
73
  ]
74
 
75
- # Common email formats to try
76
- self.email_formats = [
 
77
  '{first}.{last}', # [email protected]
78
  '{first}{last}', # [email protected]
79
- '{first}_{last}', # [email protected]
80
- '{first}-{last}', # [email protected]
81
- '{first}', # [email protected]
82
- '{f}{last}', # [email protected]
83
- '{first}{l}', # [email protected]
84
  ]
85
 
86
  async def find_real_contacts(
@@ -91,123 +87,74 @@ class EnhancedContactFinder:
91
  max_contacts: int = 3
92
  ) -> List[Contact]:
93
  """
94
- Find real decision-makers using multiple strategies
 
 
 
95
 
96
  Returns:
97
- List of Contact objects with real names and emails
98
  """
99
- logger.info(f"EnhancedFinder: Finding real contacts at '{company_name}'")
100
  print(f"\n[CONTACT FINDER] Starting search for {company_name}")
101
  print(f"[CONTACT FINDER] Domain: {domain}")
102
  print(f"[CONTACT FINDER] Target titles: {target_titles}")
103
  print(f"[CONTACT FINDER] Max contacts: {max_contacts}")
 
104
 
105
  contacts = []
106
  seen_emails: Set[str] = set()
107
 
108
- # Strategy 1: Search LinkedIn profiles for each title
109
- print(f"[CONTACT FINDER] Strategy 1: Searching LinkedIn profiles...")
110
- for title in target_titles[:max_contacts * 2]: # Search more than needed
111
- print(f"[CONTACT FINDER] Searching for: {title}")
112
- linkedin_contact = await self._find_linkedin_contact(
113
- company_name,
114
- title,
115
- domain,
116
- seen_emails
117
- )
118
- if linkedin_contact:
119
- contacts.append(linkedin_contact)
120
- seen_emails.add(linkedin_contact.email.lower())
121
- logger.info(f"EnhancedFinder: Found {title} via LinkedIn search")
122
- print(f"[CONTACT FINDER] ✓ FOUND: {linkedin_contact.name} ({linkedin_contact.title}) - {linkedin_contact.email}")
123
-
124
- if len(contacts) >= max_contacts:
125
- print(f"[CONTACT FINDER] Found enough contacts ({len(contacts)}), returning early")
126
- return contacts
127
- else:
128
- print(f"[CONTACT FINDER] ✗ Not found via LinkedIn")
129
-
130
- # Strategy 2: Scrape company team pages
131
  if len(contacts) < max_contacts:
132
- team_contacts = await self._scrape_team_pages(
 
133
  company_name,
134
  domain,
135
  target_titles,
136
  seen_emails,
137
  max_contacts - len(contacts)
138
  )
139
- contacts.extend(team_contacts)
140
- logger.info(f"EnhancedFinder: Found {len(team_contacts)} contacts from team pages")
141
 
142
- # Strategy 3: General web search for contact info
143
  if len(contacts) < max_contacts:
144
- web_contacts = await self._search_company_contacts(
 
145
  company_name,
146
  domain,
147
  target_titles,
148
  seen_emails,
149
  max_contacts - len(contacts)
150
  )
151
- contacts.extend(web_contacts)
152
- logger.info(f"EnhancedFinder: Found {len(web_contacts)} contacts from web search")
153
 
154
- logger.info(f"EnhancedFinder: Total {len(contacts)} real contacts found for '{company_name}'")
155
  print(f"[CONTACT FINDER] === FINAL RESULT ===")
156
- print(f"[CONTACT FINDER] Total contacts found: {len(contacts)}")
157
  for i, contact in enumerate(contacts[:max_contacts], 1):
158
- print(f"[CONTACT FINDER] {i}. {contact.name} ({contact.title}) - {contact.email}")
 
 
 
159
  print(f"[CONTACT FINDER] ====================\n")
160
  return contacts[:max_contacts]
161
 
162
- async def _find_linkedin_contact(
163
- self,
164
- company_name: str,
165
- title: str,
166
- domain: str,
167
- seen_emails: Set[str]
168
- ) -> Optional[Contact]:
169
- """Search LinkedIn specifically for decision-makers"""
170
-
171
- # LinkedIn-specific search queries
172
- queries = [
173
- f'site:linkedin.com/in {title} at {company_name}',
174
- f'linkedin {company_name} {title}',
175
- f'"{title}" "{company_name}" linkedin.com',
176
- ]
177
-
178
- for query in queries:
179
- try:
180
- print(f"[CONTACT FINDER] Query: '{query}'")
181
- results = await self.search.search(query, max_results=5)
182
- print(f"[CONTACT FINDER] Results: {len(results)} found")
183
-
184
- for result in results:
185
- # Extract name and title from LinkedIn result
186
- contact_info = self._extract_linkedin_info(result, title, company_name)
187
-
188
- if contact_info and contact_info.get('name'):
189
- name = contact_info['name']
190
- detected_title = contact_info.get('title', title)
191
-
192
- # Generate email from name
193
- email = await self._generate_verified_email(name, domain, seen_emails)
194
-
195
- if email:
196
- return Contact(
197
- id=str(uuid.uuid4()),
198
- name=name,
199
- email=email,
200
- title=detected_title,
201
- prospect_id=""
202
- )
203
-
204
- except Exception as e:
205
- logger.debug(f"EnhancedFinder: LinkedIn search error for '{query}': {str(e)}")
206
- continue
207
-
208
- return None
209
-
210
- async def _scrape_team_pages(
211
  self,
212
  company_name: str,
213
  domain: str,
@@ -215,148 +162,54 @@ class EnhancedContactFinder:
215
  seen_emails: Set[str],
216
  max_needed: int
217
  ) -> List[Contact]:
218
- """Scrape company team/about pages for contact information"""
219
-
220
  contacts = []
221
 
222
- # Try to find team page URLs
223
- team_page_queries = [
224
- f'site:{domain} team',
225
- f'site:{domain} leadership',
226
- f'site:{domain} about us',
227
- f'{company_name} team page',
228
  ]
229
 
230
- team_page_urls = set()
231
-
232
- for query in team_page_queries:
233
  try:
234
- results = await self.search.search(query, max_results=3)
235
- for result in results:
236
- url = result.get('url', '')
237
- if domain in url and any(pattern in url.lower() for pattern in self.team_page_patterns):
238
- team_page_urls.add(url)
239
- except Exception as e:
240
- logger.debug(f"EnhancedFinder: Team page search error: {str(e)}")
241
- continue
242
-
243
- # Also try common team page URLs directly
244
- for pattern in self.team_page_patterns[:3]: # Try top 3 patterns
245
- team_page_urls.add(f"https://{domain}{pattern}")
246
- team_page_urls.add(f"https://www.{domain}{pattern}")
247
 
248
- # Scrape each team page
249
- for url in list(team_page_urls)[:5]: # Limit to 5 pages
250
- try:
251
- page_contacts = await self._extract_contacts_from_page(
252
- url,
253
- company_name,
254
- domain,
255
- target_titles,
256
- seen_emails
257
- )
258
-
259
- for contact in page_contacts:
260
- if contact.email.lower() not in seen_emails:
261
- contacts.append(contact)
262
- seen_emails.add(contact.email.lower())
263
-
264
- if len(contacts) >= max_needed:
265
- return contacts
 
 
 
 
 
 
266
 
267
  except Exception as e:
268
- logger.debug(f"EnhancedFinder: Error scraping {url}: {str(e)}")
269
  continue
270
 
271
  return contacts
272
 
273
- async def _extract_contacts_from_page(
274
- self,
275
- url: str,
276
- company_name: str,
277
- domain: str,
278
- target_titles: List[str],
279
- seen_emails: Set[str]
280
- ) -> List[Contact]:
281
- """Extract contact information from a webpage"""
282
-
283
- contacts = []
284
-
285
- try:
286
- # Scrape the page
287
- page_content = await self.scraper.scrape_page(url)
288
-
289
- if not page_content:
290
- return contacts
291
-
292
- # Extract all text
293
- text = page_content.get('text', '')
294
-
295
- # Find all potential contacts using regex patterns
296
- potential_contacts = []
297
-
298
- for pattern in self.name_patterns:
299
- matches = re.finditer(pattern, text, re.MULTILINE)
300
- for match in matches:
301
- name = match.group(1).strip()
302
- title = match.group(2).strip() if len(match.groups()) > 1 else ""
303
-
304
- # Validate name
305
- if self._is_valid_name(name):
306
- potential_contacts.append({
307
- 'name': name,
308
- 'title': title
309
- })
310
-
311
- # Also look for email addresses directly on the page
312
- email_pattern = r'\b[A-Za-z0-9._%+-]+@' + re.escape(domain) + r'\b'
313
- found_emails = re.findall(email_pattern, text, re.IGNORECASE)
314
-
315
- # Match contacts with titles we're looking for
316
- for pc in potential_contacts:
317
- name = pc['name']
318
- title = pc['title']
319
-
320
- # Check if title matches any of our target titles
321
- title_match = any(
322
- target.lower() in title.lower() or title.lower() in target.lower()
323
- for target in target_titles
324
- )
325
-
326
- if title_match or not pc['title']: # Include if title matches or no title found
327
- # Try to find email for this person
328
- email = None
329
-
330
- # First, check if we found a direct email for this person on the page
331
- name_parts = name.lower().split()
332
- for found_email in found_emails:
333
- if any(part in found_email.lower() for part in name_parts):
334
- email = found_email
335
- break
336
-
337
- # If no direct email, generate one
338
- if not email:
339
- email = await self._generate_verified_email(name, domain, seen_emails)
340
-
341
- if email and email.lower() not in seen_emails:
342
- # Use matched title or best guess from target titles
343
- final_title = title if title else target_titles[0]
344
-
345
- contacts.append(Contact(
346
- id=str(uuid.uuid4()),
347
- name=name,
348
- email=email,
349
- title=final_title,
350
- prospect_id=""
351
- ))
352
- seen_emails.add(email.lower())
353
-
354
- except Exception as e:
355
- logger.error(f"EnhancedFinder: Error extracting contacts from {url}: {str(e)}")
356
-
357
- return contacts
358
-
359
- async def _search_company_contacts(
360
  self,
361
  company_name: str,
362
  domain: str,
@@ -364,122 +217,188 @@ class EnhancedContactFinder:
364
  seen_emails: Set[str],
365
  max_needed: int
366
  ) -> List[Contact]:
367
- """General web search for company contacts"""
368
-
369
  contacts = []
370
 
371
- # Broader search queries
372
- queries = [
373
- f'{company_name} executives contact',
374
- f'{company_name} leadership team',
375
- f'{company_name} customer experience contact',
376
- f'"{company_name}" "customer success" contact email',
 
 
 
 
 
 
377
  ]
378
 
379
- for query in queries:
380
  try:
381
- results = await self.search.search(query, max_results=5)
 
 
382
 
383
- for result in results:
384
- # Try to extract contact info
385
- extracted = self._extract_contact_from_text(
386
- result.get('title', '') + ' ' + result.get('body', ''),
387
- target_titles,
388
- company_name
389
- )
390
 
391
- if extracted:
392
- name = extracted['name']
393
- title = extracted['title']
394
 
395
- email = await self._generate_verified_email(name, domain, seen_emails)
 
 
 
396
 
397
- if email and email.lower() not in seen_emails:
398
  contacts.append(Contact(
399
  id=str(uuid.uuid4()),
400
  name=name,
401
  email=email,
402
- title=title,
403
  prospect_id=""
404
  ))
405
  seen_emails.add(email.lower())
 
406
 
407
  if len(contacts) >= max_needed:
408
  return contacts
409
 
410
  except Exception as e:
411
- logger.debug(f"EnhancedFinder: Search error for '{query}': {str(e)}")
412
  continue
413
 
414
  return contacts
415
 
416
- def _extract_linkedin_info(
417
  self,
418
- result: Dict,
419
- expected_title: str,
420
- company_name: str
421
- ) -> Optional[Dict]:
422
- """Extract name and title from LinkedIn search result"""
423
-
424
- text = result.get('title', '') + ' ' + result.get('body', '')
425
-
426
- # LinkedIn title format: "Name - Title at Company | LinkedIn"
427
- linkedin_pattern = r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\s*[-–—]\s*([^|]+?)\s*(?:at|@)\s*([^|]+)'
428
- match = re.search(linkedin_pattern, text)
429
-
430
- if match:
431
- name = match.group(1).strip()
432
- title = match.group(2).strip()
433
- company = match.group(3).strip()
434
-
435
- # Validate that it's the right company
436
- if company_name.lower() in company.lower() and self._is_valid_name(name):
437
- return {
438
- 'name': name,
439
- 'title': title
440
- }
441
-
442
- # Try other patterns
443
- for pattern in self.name_patterns:
444
- match = re.search(pattern, text)
445
- if match and len(match.groups()) >= 2:
446
- name = match.group(1).strip()
447
- title = match.group(2).strip()
448
 
449
- if self._is_valid_name(name):
450
- return {
451
- 'name': name,
452
- 'title': title
453
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
454
 
455
- return None
456
 
457
- def _extract_contact_from_text(
458
- self,
459
- text: str,
460
- target_titles: List[str],
461
- company_name: str
462
- ) -> Optional[Dict]:
463
- """Extract contact information from general text"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
  for pattern in self.name_patterns:
466
- matches = re.finditer(pattern, text, re.MULTILINE)
467
-
468
- for match in matches:
469
- if len(match.groups()) >= 2:
470
- name = match.group(1).strip()
471
- title = match.group(2).strip()
472
-
473
- # Check if title matches our targets
474
- if self._is_valid_name(name) and any(
475
- target.lower() in title.lower() or title.lower() in target.lower()
476
- for target in target_titles
477
- ):
478
- return {
479
- 'name': name,
480
- 'title': title
481
- }
 
 
 
 
 
 
482
 
 
 
 
 
 
 
 
 
483
  return None
484
 
485
  def _is_valid_name(self, name: str) -> bool:
@@ -515,51 +434,6 @@ class EnhancedContactFinder:
515
 
516
  return True
517
 
518
- async def _generate_verified_email(
519
- self,
520
- name: str,
521
- domain: str,
522
- seen_emails: Set[str]
523
- ) -> Optional[str]:
524
- """Generate and validate email address from name"""
525
-
526
- # Clean name
527
- name_clean = re.sub(r"[^a-zA-Z\s]", "", name).strip().lower()
528
- parts = name_clean.split()
529
-
530
- if len(parts) < 2:
531
- return None
532
-
533
- first = parts[0]
534
- last = parts[-1]
535
-
536
- # Try different email formats
537
- for fmt in self.email_formats:
538
- try:
539
- email_prefix = fmt.format(
540
- first=first,
541
- last=last,
542
- f=first[0] if first else '',
543
- l=last[0] if last else ''
544
- )
545
-
546
- email = f"{email_prefix}@{domain}"
547
-
548
- # Validate format
549
- validated = validate_email(email, check_deliverability=False)
550
- normalized = validated.normalized
551
-
552
- # Check if not seen
553
- if normalized.lower() not in seen_emails:
554
- # Check if not a generic email
555
- if not self._is_generic_email(email_prefix):
556
- return normalized
557
-
558
- except EmailNotValidError:
559
- continue
560
-
561
- return None
562
-
563
  def _is_generic_email(self, prefix: str) -> bool:
564
  """Check if email prefix is generic (info, contact, etc.)"""
565
 
 
72
  r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\s*\n\s*([A-Z][^,\n]+)',
73
  ]
74
 
75
+ # We do NOT estimate emails - only use verified emails found on web
76
+ # This list is kept for reference but not used for generation
77
+ self._common_email_patterns = [
78
  '{first}.{last}', # [email protected]
79
  '{first}{last}', # [email protected]
 
 
 
 
 
80
  ]
81
 
82
  async def find_real_contacts(
 
87
  max_contacts: int = 3
88
  ) -> List[Contact]:
89
  """
90
+ Find real decision-makers with VERIFIED email addresses.
91
+
92
+ IMPORTANT: Only returns contacts where we found ACTUAL email addresses
93
+ from web sources. Does NOT generate or estimate emails.
94
 
95
  Returns:
96
+ List of Contact objects with verified names and emails only
97
  """
98
+ logger.info(f"EnhancedFinder: Finding VERIFIED contacts at '{company_name}'")
99
  print(f"\n[CONTACT FINDER] Starting search for {company_name}")
100
  print(f"[CONTACT FINDER] Domain: {domain}")
101
  print(f"[CONTACT FINDER] Target titles: {target_titles}")
102
  print(f"[CONTACT FINDER] Max contacts: {max_contacts}")
103
+ print(f"[CONTACT FINDER] NOTE: Only returning contacts with VERIFIED emails found on web")
104
 
105
  contacts = []
106
  seen_emails: Set[str] = set()
107
 
108
+ # Strategy 1: Search for actual email addresses directly
109
+ print(f"[CONTACT FINDER] Strategy 1: Searching for actual email addresses...")
110
+ email_contacts = await self._search_for_emails(
111
+ company_name,
112
+ domain,
113
+ target_titles,
114
+ seen_emails,
115
+ max_contacts
116
+ )
117
+ contacts.extend(email_contacts)
118
+ print(f"[CONTACT FINDER] Found {len(email_contacts)} contacts with verified emails")
119
+
120
+ # Strategy 2: Scrape company team/contact pages for emails
 
 
 
 
 
 
 
 
 
 
121
  if len(contacts) < max_contacts:
122
+ print(f"[CONTACT FINDER] Strategy 2: Scraping company pages for contact emails...")
123
+ scraped_contacts = await self._scrape_for_verified_emails(
124
  company_name,
125
  domain,
126
  target_titles,
127
  seen_emails,
128
  max_contacts - len(contacts)
129
  )
130
+ contacts.extend(scraped_contacts)
131
+ print(f"[CONTACT FINDER] Found {len(scraped_contacts)} contacts from page scraping")
132
 
133
+ # Strategy 3: Search LinkedIn + news for names WITH email mentions
134
  if len(contacts) < max_contacts:
135
+ print(f"[CONTACT FINDER] Strategy 3: Searching for executives with public emails...")
136
+ linkedin_contacts = await self._find_contacts_with_emails(
137
  company_name,
138
  domain,
139
  target_titles,
140
  seen_emails,
141
  max_contacts - len(contacts)
142
  )
143
+ contacts.extend(linkedin_contacts)
144
+ print(f"[CONTACT FINDER] Found {len(linkedin_contacts)} contacts from web search")
145
 
146
+ logger.info(f"EnhancedFinder: Total {len(contacts)} VERIFIED contacts found for '{company_name}'")
147
  print(f"[CONTACT FINDER] === FINAL RESULT ===")
148
+ print(f"[CONTACT FINDER] Total VERIFIED contacts found: {len(contacts)}")
149
  for i, contact in enumerate(contacts[:max_contacts], 1):
150
+ print(f"[CONTACT FINDER] {i}. {contact.name} ({contact.title}) - {contact.email} [VERIFIED]")
151
+ if len(contacts) == 0:
152
+ print(f"[CONTACT FINDER] No contacts with verified emails found.")
153
+ print(f"[CONTACT FINDER] This is normal - many companies don't publish executive emails.")
154
  print(f"[CONTACT FINDER] ====================\n")
155
  return contacts[:max_contacts]
156
 
157
+ async def _search_for_emails(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  self,
159
  company_name: str,
160
  domain: str,
 
162
  seen_emails: Set[str],
163
  max_needed: int
164
  ) -> List[Contact]:
165
+ """Search specifically for email addresses associated with company executives"""
 
166
  contacts = []
167
 
168
+ # Direct email search queries
169
+ email_queries = [
170
+ f'"{domain}" email CEO OR founder OR director',
171
+ f'"{company_name}" contact email executive',
172
+ f'site:{domain} email contact',
173
+ f'"{company_name}" "@{domain}" CEO OR VP OR director',
174
  ]
175
 
176
+ for query in email_queries:
 
 
177
  try:
178
+ print(f"[CONTACT FINDER] Query: '{query}'")
179
+ results = await self.search.search(query, max_results=10)
 
 
 
 
 
 
 
 
 
 
 
180
 
181
+ for result in results:
182
+ text = result.get('title', '') + ' ' + result.get('body', '')
183
+
184
+ # Extract emails from text
185
+ found_emails = self._extract_emails_from_text(text, domain)
186
+
187
+ for email in found_emails:
188
+ if email.lower() not in seen_emails and not self._is_generic_email(email.split('@')[0]):
189
+ # Try to find associated name and title
190
+ name, title = self._extract_name_near_email(text, email, target_titles)
191
+
192
+ if name:
193
+ contacts.append(Contact(
194
+ id=str(uuid.uuid4()),
195
+ name=name,
196
+ email=email,
197
+ title=title or "Executive",
198
+ prospect_id=""
199
+ ))
200
+ seen_emails.add(email.lower())
201
+ print(f"[CONTACT FINDER] ✓ FOUND: {name} - {email}")
202
+
203
+ if len(contacts) >= max_needed:
204
+ return contacts
205
 
206
  except Exception as e:
207
+ logger.debug(f"Email search error: {str(e)}")
208
  continue
209
 
210
  return contacts
211
 
212
+ async def _scrape_for_verified_emails(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  self,
214
  company_name: str,
215
  domain: str,
 
217
  seen_emails: Set[str],
218
  max_needed: int
219
  ) -> List[Contact]:
220
+ """Scrape company pages to find actual email addresses"""
 
221
  contacts = []
222
 
223
+ # Pages likely to have contact info
224
+ pages_to_check = [
225
+ f"https://{domain}/contact",
226
+ f"https://{domain}/contact-us",
227
+ f"https://{domain}/about",
228
+ f"https://{domain}/about-us",
229
+ f"https://{domain}/team",
230
+ f"https://{domain}/leadership",
231
+ f"https://{domain}/our-team",
232
+ f"https://www.{domain}/contact",
233
+ f"https://www.{domain}/about",
234
+ f"https://www.{domain}/team",
235
  ]
236
 
237
+ for url in pages_to_check:
238
  try:
239
+ page_content = await self.scraper.scrape_page(url)
240
+ if not page_content:
241
+ continue
242
 
243
+ text = page_content.get('text', '')
 
 
 
 
 
 
244
 
245
+ # Find all emails on page
246
+ found_emails = self._extract_emails_from_text(text, domain)
 
247
 
248
+ for email in found_emails:
249
+ if email.lower() not in seen_emails and not self._is_generic_email(email.split('@')[0]):
250
+ # Try to find associated name
251
+ name, title = self._extract_name_near_email(text, email, target_titles)
252
 
253
+ if name:
254
  contacts.append(Contact(
255
  id=str(uuid.uuid4()),
256
  name=name,
257
  email=email,
258
+ title=title or "Contact",
259
  prospect_id=""
260
  ))
261
  seen_emails.add(email.lower())
262
+ print(f"[CONTACT FINDER] ✓ SCRAPED: {name} - {email} from {url}")
263
 
264
  if len(contacts) >= max_needed:
265
  return contacts
266
 
267
  except Exception as e:
268
+ logger.debug(f"Scrape error for {url}: {str(e)}")
269
  continue
270
 
271
  return contacts
272
 
273
+ async def _find_contacts_with_emails(
274
  self,
275
+ company_name: str,
276
+ domain: str,
277
+ target_titles: List[str],
278
+ seen_emails: Set[str],
279
+ max_needed: int
280
+ ) -> List[Contact]:
281
+ """Search for executives and only return those with verified emails"""
282
+ contacts = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
+ for title in target_titles:
285
+ # Search for person WITH email mention
286
+ queries = [
287
+ f'"{company_name}" {title} email "@{domain}"',
288
+ f'"{company_name}" {title} contact email',
289
+ f'site:linkedin.com "{company_name}" {title} email',
290
+ ]
291
+
292
+ for query in queries:
293
+ try:
294
+ results = await self.search.search(query, max_results=5)
295
+
296
+ for result in results:
297
+ text = result.get('title', '') + ' ' + result.get('body', '')
298
+
299
+ # Only proceed if we find an actual email
300
+ found_emails = self._extract_emails_from_text(text, domain)
301
+
302
+ for email in found_emails:
303
+ if email.lower() not in seen_emails and not self._is_generic_email(email.split('@')[0]):
304
+ # Extract name from text
305
+ name = self._extract_name_from_text(text, company_name)
306
+
307
+ if name:
308
+ contacts.append(Contact(
309
+ id=str(uuid.uuid4()),
310
+ name=name,
311
+ email=email,
312
+ title=title,
313
+ prospect_id=""
314
+ ))
315
+ seen_emails.add(email.lower())
316
+ print(f"[CONTACT FINDER] ✓ FOUND: {name} ({title}) - {email}")
317
+
318
+ if len(contacts) >= max_needed:
319
+ return contacts
320
+
321
+ except Exception as e:
322
+ logger.debug(f"Search error: {str(e)}")
323
+ continue
324
 
325
+ return contacts
326
 
327
+ def _extract_emails_from_text(self, text: str, domain: str) -> List[str]:
328
+ """Extract email addresses from text, prioritizing company domain"""
329
+ if not text:
330
+ return []
331
+
332
+ # Find all emails
333
+ email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
334
+ all_emails = re.findall(email_pattern, text, re.IGNORECASE)
335
+
336
+ # Prioritize company domain emails
337
+ company_emails = [e for e in all_emails if domain.lower() in e.lower()]
338
+
339
+ # Filter out junk
340
+ filtered = []
341
+ ignore_patterns = ['example.com', 'domain.com', 'email.com', 'test.com', 'sample.com',
342
+ 'noreply', 'no-reply', 'donotreply', 'unsubscribe', 'privacy',
343
+ 'support@', 'info@', 'contact@', 'hello@', 'sales@', 'help@']
344
+
345
+ for email in company_emails:
346
+ if not any(pattern in email.lower() for pattern in ignore_patterns):
347
+ filtered.append(email.lower())
348
 
349
+ return list(set(filtered))
350
+
351
+ def _extract_name_near_email(self, text: str, email: str, target_titles: List[str]) -> tuple:
352
+ """Extract name that appears near an email address"""
353
+ if not text or not email:
354
+ return None, None
355
+
356
+ # Find context around email (200 chars before and after)
357
+ email_pos = text.lower().find(email.lower())
358
+ if email_pos == -1:
359
+ return None, None
360
+
361
+ start = max(0, email_pos - 200)
362
+ end = min(len(text), email_pos + len(email) + 200)
363
+ context = text[start:end]
364
+
365
+ # Look for name patterns in context
366
+ name = None
367
+ title = None
368
+
369
+ # Try to find name-title patterns
370
  for pattern in self.name_patterns:
371
+ match = re.search(pattern, context)
372
+ if match:
373
+ potential_name = match.group(1).strip()
374
+ if self._is_valid_name(potential_name):
375
+ name = potential_name
376
+ if len(match.groups()) > 1:
377
+ title = match.group(2).strip()
378
+ break
379
+
380
+ # If no name found, try simpler extraction
381
+ if not name:
382
+ # Look for capitalized name-like words near email
383
+ words = context.split()
384
+ for i, word in enumerate(words):
385
+ if word and word[0].isupper() and len(word) > 2:
386
+ if i + 1 < len(words) and words[i+1] and words[i+1][0].isupper():
387
+ potential_name = f"{word} {words[i+1]}"
388
+ if self._is_valid_name(potential_name):
389
+ name = potential_name
390
+ break
391
+
392
+ return name, title
393
 
394
+ def _extract_name_from_text(self, text: str, company_name: str) -> Optional[str]:
395
+ """Extract a person's name from text"""
396
+ for pattern in self.name_patterns:
397
+ match = re.search(pattern, text)
398
+ if match:
399
+ name = match.group(1).strip()
400
+ if self._is_valid_name(name) and company_name.lower() not in name.lower():
401
+ return name
402
  return None
403
 
404
  def _is_valid_name(self, name: str) -> bool:
 
434
 
435
  return True
436
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  def _is_generic_email(self, prefix: str) -> bool:
438
  """Check if email prefix is generic (info, contact, etc.)"""
439