Spaces:

oceddyyy
/

QandA_Generator

Sleeping

App Files Files Community

oceddyyy commited on May 8, 2025

Commit

4f309ed

verified ·

1 Parent(s): bd9d487

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -14

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import json
-import re
 import os
 import spacy
 from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
@@ -20,6 +19,9 @@ qg_model = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-base-qa-qg-hl")
 qg_tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-qa-qg-hl", use_fast=True)
 qg_pipeline = pipeline("text2text-generation", model=qg_model, tokenizer=qg_tokenizer)
 def extract_paragraph_facts(raw_text):
     return [p.strip() for p in raw_text.strip().split("\n\n") if p.strip()]
@@ -61,28 +63,31 @@ def generate_single_qna(fact, noun_phrase, min_len, max_len, temperature, top_k,
 def generate_qna_all(input_text, selected_fact, selected_np, min_len, max_len, temperature, top_k, top_p):
     facts = extract_paragraph_facts(input_text)
-    results = []
     if selected_fact:
         noun_phrase = selected_np if selected_np else auto_highlight_noun_phrase(selected_fact)
         result = generate_single_qna(selected_fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
-        results.append(result)
     else:
         for fact in facts:
             noun_phrase = auto_highlight_noun_phrase(fact)
             result = generate_single_qna(fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
-            results.append(result)
-    return json.dumps(results, indent=2, ensure_ascii=False)
-def save_json_to_dataset(json_str):
     try:
         hf_token = os.environ.get("QandA_Generator")
         if not hf_token:
             return "❌ HF_TOKEN not found in environment."
         repo_id = "UniversityAIChatbot/University_Inquiries_AI_Chatbot"
-        target_file = "dataset.json"  # Or change to database.json if needed
         local_dir = "hf_repo"
         repo = Repository(
@@ -102,19 +107,20 @@ def save_json_to_dataset(json_str):
         else:
             existing_data = []
-        new_data = json.loads(json_str)
         now = datetime.now()
-        for entry in new_data:
             entry["month"] = now.strftime("%B")
             entry["year"] = now.year
-        updated_data = existing_data + new_data
         with open(full_path, "w", encoding="utf-8") as f:
             json.dump(updated_data, f, indent=2, ensure_ascii=False)
-        repo.push_to_hub(commit_message="📥 Add new Q&A to dataset.json")
         return "✅ Data with timestamp successfully pushed to Space!"
     except Exception as e:
@@ -163,7 +169,7 @@ def main():
             lines=14,
             label="Q&A JSON",
             interactive=True,
-            placeholder='{\n"question": "Your question?",\n"answer": "Your answer."\n},'
         )
         with gr.Row():
@@ -177,7 +183,7 @@ def main():
         )
         send_status = gr.Textbox(label="Save Status", interactive=False)
-        send_btn.click(fn=save_json_to_dataset, inputs=output_json, outputs=send_status)
     demo.launch()

 import json
 import os
 import spacy
 from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
 qg_tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-qa-qg-hl", use_fast=True)
 qg_pipeline = pipeline("text2text-generation", model=qg_model, tokenizer=qg_tokenizer)
+# Global variable to accumulate Q&A
+batch_data = []
 def extract_paragraph_facts(raw_text):
     return [p.strip() for p in raw_text.strip().split("\n\n") if p.strip()]
 def generate_qna_all(input_text, selected_fact, selected_np, min_len, max_len, temperature, top_k, top_p):
     facts = extract_paragraph_facts(input_text)
+    global batch_data  # Access global batch_data
     if selected_fact:
         noun_phrase = selected_np if selected_np else auto_highlight_noun_phrase(selected_fact)
         result = generate_single_qna(selected_fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
+        batch_data.append(result)
     else:
         for fact in facts:
             noun_phrase = auto_highlight_noun_phrase(fact)
             result = generate_single_qna(fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
+            batch_data.append(result)
+    return json.dumps(batch_data, indent=2, ensure_ascii=False)
+def save_json_to_dataset():
     try:
+        if not batch_data:
+            return "❌ No data to save. Generate some Q&A first."
         hf_token = os.environ.get("QandA_Generator")
         if not hf_token:
             return "❌ HF_TOKEN not found in environment."
         repo_id = "UniversityAIChatbot/University_Inquiries_AI_Chatbot"
+        target_file = "database.json"  # Or change to database.json if needed
         local_dir = "hf_repo"
         repo = Repository(
         else:
             existing_data = []
         now = datetime.now()
+        for entry in batch_data:
             entry["month"] = now.strftime("%B")
             entry["year"] = now.year
+        updated_data = existing_data + batch_data
         with open(full_path, "w", encoding="utf-8") as f:
             json.dump(updated_data, f, indent=2, ensure_ascii=False)
+        repo.push_to_hub(commit_message="📥 Add new Q&A to database.json")
+        # Reset the batch_data after pushing
+        batch_data = []
         return "✅ Data with timestamp successfully pushed to Space!"
     except Exception as e:
             lines=14,
             label="Q&A JSON",
             interactive=True,
+            placeholder='[\n{\n"question": "Your question?",\n"answer": "Your answer."\n}\n]'
         )
         with gr.Row():
         )
         send_status = gr.Textbox(label="Save Status", interactive=False)
+        send_btn.click(fn=save_json_to_dataset, inputs=None, outputs=send_status)
     demo.launch()