Spaces:
Build error
Build error
Commit
ยท
849819f
1
Parent(s):
498ff0a
upload post processing
Browse files
app.py
CHANGED
|
@@ -25,7 +25,7 @@ model = load_model("QuoQA-NLP/konec-privacy")
|
|
| 25 |
model.eval()
|
| 26 |
|
| 27 |
|
| 28 |
-
default_value = "
|
| 29 |
|
| 30 |
src_text = st.text_area(
|
| 31 |
"๊ฒ์ฌํ๊ณ ์ถ์ ๋ฌธ์ฅ์ ์
๋ ฅํ์ธ์.",
|
|
@@ -74,10 +74,10 @@ def yield_df(default_value):
|
|
| 74 |
print(class_decoded)
|
| 75 |
|
| 76 |
label_map = {
|
| 77 |
-
"ADD":
|
| 78 |
"DN": "์งํ ์ ๋ณด",
|
| 79 |
"DT": "๋ ์ง ์ ๋ณด",
|
| 80 |
-
"LC": "
|
| 81 |
"OG": "๊ธฐ๊ด ์ ๋ณด",
|
| 82 |
"PS": "์ธ๋ช
/๋ณ๋ช
์ ๋ณด",
|
| 83 |
"QT": "์๋ ์ ๋ณด",
|
|
@@ -85,6 +85,7 @@ def yield_df(default_value):
|
|
| 85 |
"O": "๋น๋ฏผ๊ฐ ์ ๋ณด"
|
| 86 |
}
|
| 87 |
|
|
|
|
| 88 |
# pair tokens with prediction
|
| 89 |
tokenized_text = tokenizer.convert_ids_to_tokens(tokenized)
|
| 90 |
list_result = []
|
|
@@ -99,7 +100,6 @@ def yield_df(default_value):
|
|
| 99 |
df = pd.DataFrame(list_result)
|
| 100 |
# remove first and last row
|
| 101 |
df = df.iloc[1:-1]
|
| 102 |
-
st.table(df)
|
| 103 |
return df
|
| 104 |
|
| 105 |
def convert_df(df:pd.DataFrame):
|
|
@@ -113,15 +113,45 @@ def convert_json(df:pd.DataFrame):
|
|
| 113 |
return json_string
|
| 114 |
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
if src_text == "":
|
| 117 |
st.warning("Please **enter text** for translation")
|
| 118 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
st.markdown("### ๋ถ๋ฅ๋ ๋จ์ด๋ค")
|
| 120 |
st.header("")
|
| 121 |
cs, c1, c2, c3, cLast = st.columns([0.75, 1.5, 1.5, 1.5, 0.75])
|
| 122 |
|
| 123 |
-
df_result
|
| 124 |
-
|
| 125 |
with c1:
|
| 126 |
#csvbutton = download_button(results, "results.csv", "๐ฅ Download .csv")
|
| 127 |
csvbutton = st.download_button(label="๐ฅ csv๋ก ๋ค์ด๋ก๋", data=convert_df(df_result), file_name= "results.csv", mime='text/csv', key='csv')
|
|
@@ -132,6 +162,8 @@ else:
|
|
| 132 |
#jsonbutton = download_button(results, "results.json", "๐ฅ Download .json")
|
| 133 |
jsonbutton = st.download_button(label="๐ฅ json์ผ๋ก ๋ค์ด๋ก๋", data=convert_json(df_result), file_name= "results.json", mime='application/json', key='json')
|
| 134 |
|
|
|
|
|
|
|
| 135 |
with st.expander("(์ฃผ) ์ฟผ์นด์์ด์์ด ๋ฐ๋ชจ ์ฌ์ฌ ๊ด๋ จ", expanded=True):
|
| 136 |
|
| 137 |
st.write(
|
|
|
|
| 25 |
model.eval()
|
| 26 |
|
| 27 |
|
| 28 |
+
default_value = "์์ง๋, ๋น๋จ ๊ฒ์ฌํ ๊ฑฐ ๊ฒฐ๊ณผ ๋์ค์
จ์ด์."
|
| 29 |
|
| 30 |
src_text = st.text_area(
|
| 31 |
"๊ฒ์ฌํ๊ณ ์ถ์ ๋ฌธ์ฅ์ ์
๋ ฅํ์ธ์.",
|
|
|
|
| 74 |
print(class_decoded)
|
| 75 |
|
| 76 |
label_map = {
|
| 77 |
+
"ADD": "์ฃผ์ ์ ๋ณด",
|
| 78 |
"DN": "์งํ ์ ๋ณด",
|
| 79 |
"DT": "๋ ์ง ์ ๋ณด",
|
| 80 |
+
"LC": "์ฅ์ ์ ๋ณด",
|
| 81 |
"OG": "๊ธฐ๊ด ์ ๋ณด",
|
| 82 |
"PS": "์ธ๋ช
/๋ณ๋ช
์ ๋ณด",
|
| 83 |
"QT": "์๋ ์ ๋ณด",
|
|
|
|
| 85 |
"O": "๋น๋ฏผ๊ฐ ์ ๋ณด"
|
| 86 |
}
|
| 87 |
|
| 88 |
+
|
| 89 |
# pair tokens with prediction
|
| 90 |
tokenized_text = tokenizer.convert_ids_to_tokens(tokenized)
|
| 91 |
list_result = []
|
|
|
|
| 100 |
df = pd.DataFrame(list_result)
|
| 101 |
# remove first and last row
|
| 102 |
df = df.iloc[1:-1]
|
|
|
|
| 103 |
return df
|
| 104 |
|
| 105 |
def convert_df(df:pd.DataFrame):
|
|
|
|
| 113 |
return json_string
|
| 114 |
|
| 115 |
|
| 116 |
+
|
| 117 |
+
filtering_map = {
|
| 118 |
+
"์ฃผ์ ์ ๋ณด": "[์ฃผ์]",
|
| 119 |
+
"์งํ ์ ๋ณด": "[์งํ]",
|
| 120 |
+
"๋ ์ง ์ ๋ณด": "[๋ ์ง]",
|
| 121 |
+
"์ฅ์ ์ ๋ณด": "[์ฅ์]",
|
| 122 |
+
"๊ธฐ๊ด ์ ๋ณด": "[๊ธฐ๊ด]",
|
| 123 |
+
"์ธ๋ช
/๋ณ๋ช
์ ๋ณด": "[์ด๋ฆ]",
|
| 124 |
+
"์๋ ์ ๋ณด": "[์๋]",
|
| 125 |
+
"๊ด๊ณ ์ ๋ณด": "[๊ด๊ณ]",
|
| 126 |
+
"๋น๋ฏผ๊ฐ ์ ๋ณด": "[๋น๋ฏผ๊ฐ]"
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
if src_text == "":
|
| 130 |
st.warning("Please **enter text** for translation")
|
| 131 |
else:
|
| 132 |
+
df_result = yield_df(src_text)
|
| 133 |
+
st.markdown("### ํํฐ๋ง ๋ ๋ฌธ์ฅ")
|
| 134 |
+
|
| 135 |
+
display_result = ""
|
| 136 |
+
for index, row in df_result.iterrows():
|
| 137 |
+
token_info = row["ํํ์"]
|
| 138 |
+
label_info = row["์์ ๋ผ๋ฒจ"]
|
| 139 |
+
if label_info != "๋น๋ฏผ๊ฐ ์ ๋ณด":
|
| 140 |
+
token_info = filtering_map[label_info]
|
| 141 |
+
|
| 142 |
+
if "##" in token_info:
|
| 143 |
+
token_info = token_info.replace("##", "")
|
| 144 |
+
else:
|
| 145 |
+
token_info = " " + token_info
|
| 146 |
+
display_result += token_info
|
| 147 |
+
|
| 148 |
+
st.write(display_result)
|
| 149 |
+
|
| 150 |
st.markdown("### ๋ถ๋ฅ๋ ๋จ์ด๋ค")
|
| 151 |
st.header("")
|
| 152 |
cs, c1, c2, c3, cLast = st.columns([0.75, 1.5, 1.5, 1.5, 0.75])
|
| 153 |
|
| 154 |
+
st.table(df_result)
|
|
|
|
| 155 |
with c1:
|
| 156 |
#csvbutton = download_button(results, "results.csv", "๐ฅ Download .csv")
|
| 157 |
csvbutton = st.download_button(label="๐ฅ csv๋ก ๋ค์ด๋ก๋", data=convert_df(df_result), file_name= "results.csv", mime='text/csv', key='csv')
|
|
|
|
| 162 |
#jsonbutton = download_button(results, "results.json", "๐ฅ Download .json")
|
| 163 |
jsonbutton = st.download_button(label="๐ฅ json์ผ๋ก ๋ค์ด๋ก๋", data=convert_json(df_result), file_name= "results.json", mime='application/json', key='json')
|
| 164 |
|
| 165 |
+
|
| 166 |
+
|
| 167 |
with st.expander("(์ฃผ) ์ฟผ์นด์์ด์์ด ๋ฐ๋ชจ ์ฌ์ฌ ๊ด๋ จ", expanded=True):
|
| 168 |
|
| 169 |
st.write(
|