minimal-lm-arena / ui /formatting.py
Rachel Kim
fixed dark mode issue
3ee6606
raw
history blame
7.26 kB
import markdown
from pygments.formatters import HtmlFormatter
import re
import hashlib
from latex2mathml.converter import convert
def format_latex(text):
"""
Convert a Markdown string containing LaTeX formulas to HTML with MathML.
Handles:
- Inline math: $...$ or \( ... \)
- Display math: $$...$$ or \[ ... \]
Automatically converts `aligned` to `array` and wraps display formulas
in <span style="display:block;"> for spacing without breaking Markdown lists.
Args:
text (str): Markdown string with LaTeX.
convert (callable): Function that converts LaTeX string to MathML.
Returns:
str: HTML string with MathML replacing LaTeX.
"""
# Pattern for LaTeX delimiters
pattern = r'(\$\$.*?\$\$|\\\[.*?\\\]|\\\(.*?\\\)|\$.*?\$)'
def replace_latex(match):
latex_text = match.group()
# Determine delimiter type
is_display = False
if latex_text.startswith('$$') and latex_text.endswith('$$'):
latex_text_clean = latex_text[2:-2].strip()
is_display = True
elif latex_text.startswith(r'\[') and latex_text.endswith(r'\]'):
latex_text_clean = latex_text[2:-2].strip()
is_display = True
elif latex_text.startswith(r'\(') and latex_text.endswith(r'\)'):
latex_text_clean = latex_text[2:-2].strip()
elif latex_text.startswith('$') and latex_text.endswith('$'):
latex_text_clean = latex_text[1:-1].strip()
else:
latex_text_clean = latex_text.strip()
# Replace aligned -> array
latex_text_clean = re.sub(
r'\\begin\{aligned\}', r'\\begin{array}{l}', latex_text_clean
)
latex_text_clean = re.sub(
r'\\end\{aligned\}', r'\\end{array}', latex_text_clean
)
# Convert to MathML
try:
mathml = convert(latex_text_clean)
if is_display:
# Use span with display:block to preserve Markdown list numbering
mathml = f'<span style="display:block; margin:10px 0;">{mathml}</span>'
return mathml
except Exception as e:
print(f"Warning: failed to convert LaTeX: {latex_text_clean}\nError: {e}")
return latex_text_clean
# Replace all LaTeX with MathML
html_with_mathml = re.sub(pattern, replace_latex, text, flags=re.DOTALL)
return html_with_mathml
def format_history(history):
"""Format conversation history as HTML with side-by-side responses."""
# Generate Pygments CSS for syntax highlighting
pygments_css = HtmlFormatter(style='monokai').get_style_defs('.codehilite')
html = f'''
<div style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Arial, sans-serif;">
<style>
/* Pygments syntax highlighting styles */
{pygments_css}
.response-content pre {{
background-color: #272822;
padding: 12px;
border-radius: 4px;
overflow-x: auto;
margin: 10px 0;
}}
.response-content code {{
background-color: #f0f0f0;
color: #c7254e;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Courier New', monospace;
font-size: 0.9em;
white-space: pre-wrap;
word-wrap: break-word;
}}
.response-content pre code {{
background-color: transparent;
color: inherit;
padding: 0;
}}
.response-content table {{
border-collapse: collapse;
width: 100%;
margin: 10px 0;
}}
.response-content th, .response-content td {{
border: 1px solid #ddd;
padding: 8px;
text-align: left;
}}
.response-content th {{
background-color: #f0f0f0;
}}
</style>'''
for turn in history:
# User message - escape HTML but preserve line breaks
user_msg = turn["user"].replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace("\n", "<br>")
html += f'<div style="background-color: var(--color-accent-soft); padding: 10px; margin: 10px 0; border-radius: 5px;"><strong>You:</strong> {user_msg}</div>'
# Determine opacity based on highlight
opacity_a = "1.0"
opacity_b = "1.0"
border_a = "2px solid var(--block-border-color)"
border_b = "2px solid var(--block-border-color)"
if turn["highlight"] == "left":
opacity_b = "0.3"
border_a = "3px solid #4CAF50"
elif turn["highlight"] == "right":
opacity_a = "0.3"
border_b = "3px solid #4CAF50"
elif turn["highlight"] == "random_left":
opacity_b = "0.3"
border_a = "3px solid #4C50AF"
elif turn["highlight"] == "random_right": # turn["highlight"] == "light_right"
opacity_a = "0.3"
border_b = "3px solid #4C50AF"
response_a_text_with_mathml = format_latex(turn["response_a"])
response_a_html = markdown.markdown(
response_a_text_with_mathml,
extensions=['fenced_code', 'tables', 'codehilite'],
extension_configs={'codehilite': {'guess_lang': True, 'linenums': False}}
)
response_b_text_with_mathml = format_latex(turn["response_b"])
response_b_html = markdown.markdown(
response_b_text_with_mathml,
extensions=['fenced_code', 'tables', 'codehilite'],
extension_configs={'codehilite': {'guess_lang': True, 'linenums': False}}
)
# Responses side by side
html += f'''
<div style="display: flex; gap: 10px; margin: 10px 0; align-items: stretch;">
<div style="flex: 1; background-color: var(--background-fill-secondary); padding: 15px; border-radius: 5px; border: {border_a}; opacity: {opacity_a}; display: flex; flex-direction: column;">
<div style="margin-bottom: 8px;"><strong>Assistant A</strong></div>
<div class="response-content" style="line-height: 1.6; overflow-y: auto; flex: 1; max-height: 400px;">{response_a_html}</div>
</div>
<div style="flex: 1; background-color: var(--background-fill-secondary); padding: 15px; border-radius: 5px; border: {border_b}; opacity: {opacity_b}; display: flex; flex-direction: column;">
<div style="margin-bottom: 8px;"><strong>Assistant B</strong></div>
<div class="response-content" style="line-height: 1.6; overflow-y: auto; flex: 1; max-height: 400px;">{response_b_html}</div>
</div>
</div>
'''
if turn["highlight"] == "random_left":
html += '<div><b>Note:</b> you did not specify a preference for either assistant, so in subsequent turns, Assistant A will be used as context.</div>'
elif turn["highlight"] == "random_right":
html += '<div><b>Note:</b> you did not specify a preference for either assistant, so in subsequent turns, Assistant B will be used as context.</div>'
html += '</div>'
return html