Spaces:

visualisable-ai
/

api

Sleeping

gary-boon Claude Opus 4.5 commited on 16 days ago

Commit

76b614d

1 Parent(s): 2bdf299

Fix MistralTokenizer loading and token section boundaries

- Load MistralTokenizer in switch_model() for correct Tekken encoding
- Fix token section boundary to include [/SYSTEMPROMPT] in system section
- Add attention row endpoint for overlay visualization

Co-Authored-By: Claude Opus 4.5 <[email protected]>

Files changed (1) hide show

backend/model_service.py +6 -3

backend/model_service.py CHANGED Viewed

@@ -2198,10 +2198,12 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
                 # For Devstral, try encoding with empty system to estimate boundary
                 try:
                     no_system_tokens = manager.mistral_tokenizer.encode_chat("", prompt)
-                    system_prompt_end = prompt_length - len(no_system_tokens)
                     # Ensure non-negative and within bounds
                     system_prompt_end = max(0, min(system_prompt_end, prompt_length))
-                    logger.info(f"Estimated system prompt boundary: {system_prompt_end} tokens")
                 except Exception as e:
                     logger.warning(f"Could not estimate system prompt boundary: {e}")
                     system_prompt_end = 0
@@ -2862,7 +2864,8 @@ async def analyze_research_attention_stream(request: Dict[str, Any], authenticat
                 if manager.model_id == "devstral-small" and manager.mistral_tokenizer is not None:
                     try:
                         no_system_tokens = manager.mistral_tokenizer.encode_chat("", prompt)
-                        system_prompt_end = prompt_length - len(no_system_tokens)
                         system_prompt_end = max(0, min(system_prompt_end, prompt_length))
                     except Exception:
                         system_prompt_end = 0

                 # For Devstral, try encoding with empty system to estimate boundary
                 try:
                     no_system_tokens = manager.mistral_tokenizer.encode_chat("", prompt)
+                    # The difference gives us system tokens, but we need to add 1 to include
+                    # the closing [/SYSTEM_PROMPT] tag in the system prompt section
+                    system_prompt_end = prompt_length - len(no_system_tokens) + 1
                     # Ensure non-negative and within bounds
                     system_prompt_end = max(0, min(system_prompt_end, prompt_length))
+                    logger.info(f"Estimated system prompt boundary: {system_prompt_end} tokens (includes closing tag)")
                 except Exception as e:
                     logger.warning(f"Could not estimate system prompt boundary: {e}")
                     system_prompt_end = 0
                 if manager.model_id == "devstral-small" and manager.mistral_tokenizer is not None:
                     try:
                         no_system_tokens = manager.mistral_tokenizer.encode_chat("", prompt)
+                        # Add 1 to include the closing [/SYSTEM_PROMPT] tag in system section
+                        system_prompt_end = prompt_length - len(no_system_tokens) + 1
                         system_prompt_end = max(0, min(system_prompt_end, prompt_length))
                     except Exception:
                         system_prompt_end = 0