gary-boon Claude Opus 4.5 commited on
Commit
76b614d
·
1 Parent(s): 2bdf299

Fix MistralTokenizer loading and token section boundaries

Browse files

- Load MistralTokenizer in switch_model() for correct Tekken encoding
- Fix token section boundary to include [/SYSTEMPROMPT] in system section
- Add attention row endpoint for overlay visualization

Co-Authored-By: Claude Opus 4.5 <[email protected]>

Files changed (1) hide show
  1. backend/model_service.py +6 -3
backend/model_service.py CHANGED
@@ -2198,10 +2198,12 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
2198
  # For Devstral, try encoding with empty system to estimate boundary
2199
  try:
2200
  no_system_tokens = manager.mistral_tokenizer.encode_chat("", prompt)
2201
- system_prompt_end = prompt_length - len(no_system_tokens)
 
 
2202
  # Ensure non-negative and within bounds
2203
  system_prompt_end = max(0, min(system_prompt_end, prompt_length))
2204
- logger.info(f"Estimated system prompt boundary: {system_prompt_end} tokens")
2205
  except Exception as e:
2206
  logger.warning(f"Could not estimate system prompt boundary: {e}")
2207
  system_prompt_end = 0
@@ -2862,7 +2864,8 @@ async def analyze_research_attention_stream(request: Dict[str, Any], authenticat
2862
  if manager.model_id == "devstral-small" and manager.mistral_tokenizer is not None:
2863
  try:
2864
  no_system_tokens = manager.mistral_tokenizer.encode_chat("", prompt)
2865
- system_prompt_end = prompt_length - len(no_system_tokens)
 
2866
  system_prompt_end = max(0, min(system_prompt_end, prompt_length))
2867
  except Exception:
2868
  system_prompt_end = 0
 
2198
  # For Devstral, try encoding with empty system to estimate boundary
2199
  try:
2200
  no_system_tokens = manager.mistral_tokenizer.encode_chat("", prompt)
2201
+ # The difference gives us system tokens, but we need to add 1 to include
2202
+ # the closing [/SYSTEM_PROMPT] tag in the system prompt section
2203
+ system_prompt_end = prompt_length - len(no_system_tokens) + 1
2204
  # Ensure non-negative and within bounds
2205
  system_prompt_end = max(0, min(system_prompt_end, prompt_length))
2206
+ logger.info(f"Estimated system prompt boundary: {system_prompt_end} tokens (includes closing tag)")
2207
  except Exception as e:
2208
  logger.warning(f"Could not estimate system prompt boundary: {e}")
2209
  system_prompt_end = 0
 
2864
  if manager.model_id == "devstral-small" and manager.mistral_tokenizer is not None:
2865
  try:
2866
  no_system_tokens = manager.mistral_tokenizer.encode_chat("", prompt)
2867
+ # Add 1 to include the closing [/SYSTEM_PROMPT] tag in system section
2868
+ system_prompt_end = prompt_length - len(no_system_tokens) + 1
2869
  system_prompt_end = max(0, min(system_prompt_end, prompt_length))
2870
  except Exception:
2871
  system_prompt_end = 0