Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
import json
|
| 2 |
-
import re
|
| 3 |
import os
|
| 4 |
import spacy
|
| 5 |
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
|
@@ -20,6 +19,9 @@ qg_model = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-base-qa-qg-hl")
|
|
| 20 |
qg_tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-qa-qg-hl", use_fast=True)
|
| 21 |
qg_pipeline = pipeline("text2text-generation", model=qg_model, tokenizer=qg_tokenizer)
|
| 22 |
|
|
|
|
|
|
|
|
|
|
| 23 |
def extract_paragraph_facts(raw_text):
|
| 24 |
return [p.strip() for p in raw_text.strip().split("\n\n") if p.strip()]
|
| 25 |
|
|
@@ -61,28 +63,31 @@ def generate_single_qna(fact, noun_phrase, min_len, max_len, temperature, top_k,
|
|
| 61 |
|
| 62 |
def generate_qna_all(input_text, selected_fact, selected_np, min_len, max_len, temperature, top_k, top_p):
|
| 63 |
facts = extract_paragraph_facts(input_text)
|
| 64 |
-
|
| 65 |
|
| 66 |
if selected_fact:
|
| 67 |
noun_phrase = selected_np if selected_np else auto_highlight_noun_phrase(selected_fact)
|
| 68 |
result = generate_single_qna(selected_fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
|
| 69 |
-
|
| 70 |
else:
|
| 71 |
for fact in facts:
|
| 72 |
noun_phrase = auto_highlight_noun_phrase(fact)
|
| 73 |
result = generate_single_qna(fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
|
| 74 |
-
|
| 75 |
|
| 76 |
-
return json.dumps(
|
| 77 |
|
| 78 |
-
def save_json_to_dataset(
|
| 79 |
try:
|
|
|
|
|
|
|
|
|
|
| 80 |
hf_token = os.environ.get("QandA_Generator")
|
| 81 |
if not hf_token:
|
| 82 |
return "β HF_TOKEN not found in environment."
|
| 83 |
|
| 84 |
repo_id = "UniversityAIChatbot/University_Inquiries_AI_Chatbot"
|
| 85 |
-
target_file = "
|
| 86 |
local_dir = "hf_repo"
|
| 87 |
|
| 88 |
repo = Repository(
|
|
@@ -102,19 +107,20 @@ def save_json_to_dataset(json_str):
|
|
| 102 |
else:
|
| 103 |
existing_data = []
|
| 104 |
|
| 105 |
-
new_data = json.loads(json_str)
|
| 106 |
-
|
| 107 |
now = datetime.now()
|
| 108 |
-
for entry in
|
| 109 |
entry["month"] = now.strftime("%B")
|
| 110 |
entry["year"] = now.year
|
| 111 |
|
| 112 |
-
updated_data = existing_data +
|
| 113 |
|
| 114 |
with open(full_path, "w", encoding="utf-8") as f:
|
| 115 |
json.dump(updated_data, f, indent=2, ensure_ascii=False)
|
| 116 |
|
| 117 |
-
repo.push_to_hub(commit_message="π₯ Add new Q&A to
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
return "β
Data with timestamp successfully pushed to Space!"
|
| 120 |
except Exception as e:
|
|
@@ -163,7 +169,7 @@ def main():
|
|
| 163 |
lines=14,
|
| 164 |
label="Q&A JSON",
|
| 165 |
interactive=True,
|
| 166 |
-
placeholder='{\n"question": "Your question?",\n"answer": "Your answer."\n}
|
| 167 |
)
|
| 168 |
|
| 169 |
with gr.Row():
|
|
@@ -177,7 +183,7 @@ def main():
|
|
| 177 |
)
|
| 178 |
|
| 179 |
send_status = gr.Textbox(label="Save Status", interactive=False)
|
| 180 |
-
send_btn.click(fn=save_json_to_dataset, inputs=
|
| 181 |
|
| 182 |
demo.launch()
|
| 183 |
|
|
|
|
| 1 |
import json
|
|
|
|
| 2 |
import os
|
| 3 |
import spacy
|
| 4 |
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
|
|
|
| 19 |
qg_tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-qa-qg-hl", use_fast=True)
|
| 20 |
qg_pipeline = pipeline("text2text-generation", model=qg_model, tokenizer=qg_tokenizer)
|
| 21 |
|
| 22 |
+
# Global variable to accumulate Q&A
|
| 23 |
+
batch_data = []
|
| 24 |
+
|
| 25 |
def extract_paragraph_facts(raw_text):
|
| 26 |
return [p.strip() for p in raw_text.strip().split("\n\n") if p.strip()]
|
| 27 |
|
|
|
|
| 63 |
|
| 64 |
def generate_qna_all(input_text, selected_fact, selected_np, min_len, max_len, temperature, top_k, top_p):
|
| 65 |
facts = extract_paragraph_facts(input_text)
|
| 66 |
+
global batch_data # Access global batch_data
|
| 67 |
|
| 68 |
if selected_fact:
|
| 69 |
noun_phrase = selected_np if selected_np else auto_highlight_noun_phrase(selected_fact)
|
| 70 |
result = generate_single_qna(selected_fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
|
| 71 |
+
batch_data.append(result)
|
| 72 |
else:
|
| 73 |
for fact in facts:
|
| 74 |
noun_phrase = auto_highlight_noun_phrase(fact)
|
| 75 |
result = generate_single_qna(fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
|
| 76 |
+
batch_data.append(result)
|
| 77 |
|
| 78 |
+
return json.dumps(batch_data, indent=2, ensure_ascii=False)
|
| 79 |
|
| 80 |
+
def save_json_to_dataset():
|
| 81 |
try:
|
| 82 |
+
if not batch_data:
|
| 83 |
+
return "β No data to save. Generate some Q&A first."
|
| 84 |
+
|
| 85 |
hf_token = os.environ.get("QandA_Generator")
|
| 86 |
if not hf_token:
|
| 87 |
return "β HF_TOKEN not found in environment."
|
| 88 |
|
| 89 |
repo_id = "UniversityAIChatbot/University_Inquiries_AI_Chatbot"
|
| 90 |
+
target_file = "database.json" # Or change to database.json if needed
|
| 91 |
local_dir = "hf_repo"
|
| 92 |
|
| 93 |
repo = Repository(
|
|
|
|
| 107 |
else:
|
| 108 |
existing_data = []
|
| 109 |
|
|
|
|
|
|
|
| 110 |
now = datetime.now()
|
| 111 |
+
for entry in batch_data:
|
| 112 |
entry["month"] = now.strftime("%B")
|
| 113 |
entry["year"] = now.year
|
| 114 |
|
| 115 |
+
updated_data = existing_data + batch_data
|
| 116 |
|
| 117 |
with open(full_path, "w", encoding="utf-8") as f:
|
| 118 |
json.dump(updated_data, f, indent=2, ensure_ascii=False)
|
| 119 |
|
| 120 |
+
repo.push_to_hub(commit_message="π₯ Add new Q&A to database.json")
|
| 121 |
+
|
| 122 |
+
# Reset the batch_data after pushing
|
| 123 |
+
batch_data = []
|
| 124 |
|
| 125 |
return "β
Data with timestamp successfully pushed to Space!"
|
| 126 |
except Exception as e:
|
|
|
|
| 169 |
lines=14,
|
| 170 |
label="Q&A JSON",
|
| 171 |
interactive=True,
|
| 172 |
+
placeholder='[\n{\n"question": "Your question?",\n"answer": "Your answer."\n}\n]'
|
| 173 |
)
|
| 174 |
|
| 175 |
with gr.Row():
|
|
|
|
| 183 |
)
|
| 184 |
|
| 185 |
send_status = gr.Textbox(label="Save Status", interactive=False)
|
| 186 |
+
send_btn.click(fn=save_json_to_dataset, inputs=None, outputs=send_status)
|
| 187 |
|
| 188 |
demo.launch()
|
| 189 |
|