Spaces:

awacke1
/

Gradio-Med-Law-Fin-Scene-Gemini

Sleeping

App Files Files Community

awacke1 commited on Jul 19

Commit

d1eb676

verified ·

1 Parent(s): 84fbea4

Update app.py.v2

Browse files

Files changed (1) hide show

app.py.v2 +37 -47

app.py.v2 CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 import pandas as pd
 import requests
@@ -10,6 +11,7 @@ import polars as pl
 import warnings
 import traceback
 import json
 # 🤫 Let's ignore those pesky warnings, shall we?
 warnings.filterwarnings("ignore")
@@ -18,7 +20,7 @@ warnings.filterwarnings("ignore")
 DATASET_CONFIG = {
     "caselaw": {
         "name": "common-pile/caselaw_access_project", "emoji": "⚖️",
-        "methods": ["💨 API (requests)", "� Dask", "🥐 Croissant"], "is_public": True,
     },
     "prompts": {
         "name": "fka/awesome-chatgpt-prompts", "emoji": "🤖",
@@ -44,31 +46,41 @@ def get_auth_headers():
     token = get_token()
     return {"Authorization": f"Bearer {token}"} if token else {}
 def dataframe_to_outputs(df: pd.DataFrame):
     if df.empty:
         return "No results found. 🤷", None, None, "No results to copy."
     df_str = df.astype(str)
     markdown_output = df_str.to_markdown(index=False)
-    csv_buffer = io.StringIO()
-    df.to_csv(csv_buffer, index=False)
-    csv_buffer.seek(0)
-    excel_buffer = io.BytesIO()
-    df.to_excel(excel_buffer, index=False, engine='openpyxl')
-    excel_buffer.seek(0)
     tab_delimited_output = df.to_csv(sep='\t', index=False)
     return (
         markdown_output,
-        gr.File.from_bytes(csv_buffer.getvalue(), "results.csv"),
-        gr.File.from_bytes(excel_buffer.getvalue(), "results.xlsx"),
         tab_delimited_output,
     )
-# --- ✨ NEW Enhanced Error Handler with Debug Logging ---
 def handle_error(e: Exception, request=None, response=None):
     """
     😱 Oh no! An error! This function now creates a detailed debug log.
     """
-    # Basic error info
     error_message = f"🚨 An error occurred: {str(e)}\n"
     auth_tip = "🔑 For gated datasets, did you log in? Try `huggingface-cli login` in your terminal."
     full_trace = traceback.format_exc()
@@ -76,41 +88,20 @@ def handle_error(e: Exception, request=None, response=None):
     if "401" in str(e) or "Gated" in str(e):
         error_message += auth_tip
-    # Detailed debug log
-    debug_log = f"""
---- 🐞 DEBUG LOG ---
-Traceback:
-{full_trace}
-Exception Type: {type(e).__name__}
-Exception Details: {e}
-"""
     if request:
-        debug_log += f"""
---- REQUEST ---
-Method: {request.method}
-URL: {request.url}
-Headers: {json.dumps(dict(request.headers), indent=2)}
-"""
     if response is not None:
         try:
-            response_json = response.json()
-            response_text = json.dumps(response_json, indent=2)
         except json.JSONDecodeError:
             response_text = response.text
-        debug_log += f"""
---- RESPONSE ---
-Status Code: {response.status_code}
-Headers: {json.dumps(dict(response.headers), indent=2)}
-Content:
-{response_text}
-"""
-    # Return a tuple of 9 to match the outputs
     return (
         pd.DataFrame(), gr.Gallery(None), f"### 🚨 Error\nAn error occurred. See the debug log below for details.",
         "", None, None, "", f"```python\n# 🚨 Error during execution:\n# {e}\n```",
-        gr.Code(value=debug_log, visible=True) # Make the debug log visible
     )
 def search_dataframe(df: pd.DataFrame, query: str):
@@ -129,10 +120,8 @@ def fetch_data(dataset_key: str, access_method: str, query: str):
     """
     🚀 Main mission control. Always yields a tuple of 9 values to match the UI components.
     """
-    # Initialize the state for all 9 output components
     outputs = [pd.DataFrame(), None, "🏁 Ready.", "", None, None, "", "", gr.Code(visible=False)]
-    req, res = None, None # To hold request/response for debugging
     try:
         config = DATASET_CONFIG[dataset_key]
         repo_id = config["name"]
@@ -158,8 +147,7 @@ def fetch_data(dataset_key: str, access_method: str, query: str):
                 res = requests.get(url, headers=headers)
                 req = res.request
-                res.raise_for_status() # Will raise an exception for 4xx/5xx errors
                 data = res.json()
                 if not data.get('rows'):
@@ -167,7 +155,11 @@ def fetch_data(dataset_key: str, access_method: str, query: str):
                     yield tuple(outputs)
                     break
-                page_df = pd.json_normalize(data['rows'], record_path='row')
                 found_in_page = search_dataframe(page_df, query)
                 if not found_in_page.empty:
@@ -189,7 +181,6 @@ def fetch_data(dataset_key: str, access_method: str, query: str):
         yield tuple(outputs)
         df = pd.DataFrame()
-        # Simplified for brevity - expand if needed
         if "Pandas" in access_method:
             file_path = f"hf://datasets/{repo_id}/"
             if repo_id == "fka/awesome-chatgpt-prompts": file_path += "prompts.csv"; df = pd.read_csv(file_path)
@@ -248,7 +239,6 @@ def create_dataset_tab(dataset_key: str):
         code_output = gr.Code(label="💻 Python Code Snippet", language="python")
-        # --- ✨ NEW Debug Log UI Component (language parameter removed) ---
         debug_log_output = gr.Code(label="🐞 Debug Log", visible=False)
         fetch_button.click(
@@ -257,7 +247,7 @@ def create_dataset_tab(dataset_key: str):
             outputs=[
                 df_output, gallery_output, status_output, markdown_output,
                 csv_output, xlsx_output, copy_output, code_output,
-                debug_log_output # Add the new output here
             ]
         )
@@ -273,4 +263,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Hugging Face Dataset Explorer") as
             create_dataset_tab(key)
 if __name__ == "__main__":
-    demo.launch(debug=True)

+# app.py
 import gradio as gr
 import pandas as pd
 import requests
 import warnings
 import traceback
 import json
+import tempfile # Added for creating temporary files
 # 🤫 Let's ignore those pesky warnings, shall we?
 warnings.filterwarnings("ignore")
 DATASET_CONFIG = {
     "caselaw": {
         "name": "common-pile/caselaw_access_project", "emoji": "⚖️",
+        "methods": ["💨 API (requests)", "🧊 Dask", "🥐 Croissant"], "is_public": True,
     },
     "prompts": {
         "name": "fka/awesome-chatgpt-prompts", "emoji": "🤖",
     token = get_token()
     return {"Authorization": f"Bearer {token}"} if token else {}
+# --- ✨ FIXED: dataframe_to_outputs to use temporary files ---
 def dataframe_to_outputs(df: pd.DataFrame):
+    """
+    📜 Takes a DataFrame and transforms it into various formats.
+    Now uses temporary files for maximum Gradio compatibility.
+    """
     if df.empty:
         return "No results found. 🤷", None, None, "No results to copy."
     df_str = df.astype(str)
     markdown_output = df_str.to_markdown(index=False)
+    # Create a temporary CSV file
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv', encoding='utf-8') as tmp_csv:
+        df.to_csv(tmp_csv.name, index=False)
+        csv_path = tmp_csv.name
+    # Create a temporary XLSX file
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp_xlsx:
+        df.to_excel(tmp_xlsx.name, index=False, engine='openpyxl')
+        xlsx_path = tmp_xlsx.name
     tab_delimited_output = df.to_csv(sep='\t', index=False)
     return (
         markdown_output,
+        csv_path,
+        xlsx_path,
         tab_delimited_output,
     )
 def handle_error(e: Exception, request=None, response=None):
     """
     😱 Oh no! An error! This function now creates a detailed debug log.
     """
     error_message = f"🚨 An error occurred: {str(e)}\n"
     auth_tip = "🔑 For gated datasets, did you log in? Try `huggingface-cli login` in your terminal."
     full_trace = traceback.format_exc()
     if "401" in str(e) or "Gated" in str(e):
         error_message += auth_tip
+    debug_log = f"""--- 🐞 DEBUG LOG ---\nTraceback:\n{full_trace}\n\nException Type: {type(e).__name__}\nException Details: {e}\n"""
     if request:
+        debug_log += f"""\n--- REQUEST ---\nMethod: {request.method}\nURL: {request.url}\nHeaders: {json.dumps(dict(request.headers), indent=2)}\n"""
     if response is not None:
         try:
+            response_text = json.dumps(response.json(), indent=2)
         except json.JSONDecodeError:
             response_text = response.text
+        debug_log += f"""\n--- RESPONSE ---\nStatus Code: {response.status_code}\nHeaders: {json.dumps(dict(response.headers), indent=2)}\nContent:\n{response_text}\n"""
     return (
         pd.DataFrame(), gr.Gallery(None), f"### 🚨 Error\nAn error occurred. See the debug log below for details.",
         "", None, None, "", f"```python\n# 🚨 Error during execution:\n# {e}\n```",
+        gr.Code(value=debug_log, visible=True)
     )
 def search_dataframe(df: pd.DataFrame, query: str):
     """
     🚀 Main mission control. Always yields a tuple of 9 values to match the UI components.
     """
     outputs = [pd.DataFrame(), None, "🏁 Ready.", "", None, None, "", "", gr.Code(visible=False)]
+    req, res = None, None
     try:
         config = DATASET_CONFIG[dataset_key]
         repo_id = config["name"]
                 res = requests.get(url, headers=headers)
                 req = res.request
+                res.raise_for_status()
                 data = res.json()
                 if not data.get('rows'):
                     yield tuple(outputs)
                     break
+                # --- ✨ FIXED: JSON processing logic ---
+                # Extract the actual data from the 'row' key of each item in the list
+                rows_data = [item['row'] for item in data['rows']]
+                page_df = pd.json_normalize(rows_data)
                 found_in_page = search_dataframe(page_df, query)
                 if not found_in_page.empty:
         yield tuple(outputs)
         df = pd.DataFrame()
         if "Pandas" in access_method:
             file_path = f"hf://datasets/{repo_id}/"
             if repo_id == "fka/awesome-chatgpt-prompts": file_path += "prompts.csv"; df = pd.read_csv(file_path)
         code_output = gr.Code(label="💻 Python Code Snippet", language="python")
         debug_log_output = gr.Code(label="🐞 Debug Log", visible=False)
         fetch_button.click(
             outputs=[
                 df_output, gallery_output, status_output, markdown_output,
                 csv_output, xlsx_output, copy_output, code_output,
+                debug_log_output
             ]
         )
             create_dataset_tab(key)
 if __name__ == "__main__":
+    demo.launch(debug=True)