Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import openpyxl | |
| import datetime | |
| import tempfile | |
| import os | |
| from openpyxl.utils.dataframe import dataframe_to_rows | |
| import openai | |
| # ===================== OpenAI API ํด๋ผ์ด์ธํธ ์ค์ ===================== | |
| openai.api_key = os.getenv("OPENAI_API_KEY") | |
| def call_api(content, system_message, max_tokens, temperature, top_p): | |
| """ | |
| ChatGPT API ํธ์ถ ํจ์. | |
| """ | |
| response = openai.ChatCompletion.create( | |
| model="gpt-4o-mini", | |
| messages=[ | |
| {"role": "system", "content": system_message}, | |
| {"role": "user", "content": content} | |
| ], | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p | |
| ) | |
| return response.choices[0].message['content'] | |
| def respond_gemini_qna(question, system_message, max_tokens, temperature, top_p, model_id): | |
| """ | |
| Gemini Flash ๋ชจ๋ธ์ ์ด์ฉํด ์ง๋ฌธ(question)์ ๋ํ ๋ต๋ณ์ ๋ฐํํ๋ ํจ์. | |
| """ | |
| try: | |
| import google.generativeai as genai | |
| except ModuleNotFoundError: | |
| return ("์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: 'google-generativeai' ๋ชจ๋์ ์ฐพ์ ์ ์์ต๋๋ค. " | |
| "ํด๊ฒฐ ๋ฐฉ๋ฒ: 'pip install --upgrade google-generativeai' ๋ฅผ ์คํํ์ฌ ์ค์นํด์ฃผ์ธ์.") | |
| gemini_api_key = os.getenv("GEMINI_API_KEY") | |
| if not gemini_api_key: | |
| return "Gemini API ํ ํฐ์ด ํ์ํฉ๋๋ค." | |
| genai.configure(api_key=gemini_api_key) | |
| prompt = f"{system_message}\n\n{question}" | |
| try: | |
| model = genai.GenerativeModel(model_name=model_id) | |
| response = model.generate_content(prompt) | |
| return response.text | |
| except Exception as e: | |
| return f"์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}" | |
| def respond_o1mini_qna(question, system_message, max_tokens, temperature): | |
| """ | |
| o1-mini ๋ชจ๋ธ์ ์ด์ฉํด ํ ๋ฒ์ ์ง๋ฌธ(question)์ ๋ํ ๋ต๋ณ์ ๋ฐํํ๋ ํจ์. | |
| o1-mini์์๋ 'system' ๋ฉ์์ง๋ฅผ ์ง์ํ์ง ์์ผ๋ฏ๋ก system_message์ question์ ํ๋์ 'user' ๋ฉ์์ง๋ก ํฉ์ณ ์ ๋ฌํฉ๋๋ค. | |
| ๋ํ, o1-mini์์๋ 'max_tokens' ๋์ 'max_completion_tokens'๋ฅผ ์ฌ์ฉํ๋ฉฐ, temperature๋ ๊ณ ์ ๊ฐ 1๋ง ์ง์ํฉ๋๋ค. | |
| """ | |
| openai_token = os.getenv("OPENAI_API_KEY") | |
| if not openai_token: | |
| return "OpenAI API ํ ํฐ์ด ํ์ํฉ๋๋ค." | |
| openai.api_key = openai_token | |
| combined_message = f"{system_message}\n\n{question}" | |
| messages = [{"role": "user", "content": combined_message}] | |
| try: | |
| response = openai.ChatCompletion.create( | |
| model="o1-mini", | |
| messages=messages, | |
| max_completion_tokens=max_tokens, | |
| temperature=1, # ๊ณ ์ ๋ ๊ฐ 1 ์ฌ์ฉ | |
| ) | |
| assistant_message = response.choices[0].message['content'] | |
| return assistant_message | |
| except Exception as e: | |
| return f"์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}" | |
| # ===================== (1) ๋ฆฌ๋ทฐ์ต์ ๋ถ์: ๋ฏธ์ 1~9 ์ฒ๋ฆฌ ===================== | |
| def analyze_options(uploaded_file, selected_year, llm_model_choice): | |
| """ | |
| ์ ๋ก๋๋ ํ์ผ๋ก๋ถํฐ ๋ฏธ์ 1~9(์ต์ /ํ์ ์ง๊ณ)๋ฅผ ์ํํ๊ณ , | |
| ์์ ์์ ํ์ผ๊ณผ ์ ํ๋ ๋ Top20 ์ต์ ๋ฆฌ์คํธ(์ ์ฒด์ต์ ๋ถ์ ํฌํจ)๋ฅผ ๋ฐํํ๋ค. | |
| """ | |
| if uploaded_file is None: | |
| return None, ["์ ์ฒด์ต์ ๋ถ์"] | |
| try: | |
| df_upload = pd.read_excel(uploaded_file) | |
| except Exception as e: | |
| return None, ["์ ์ฒด์ต์ ๋ถ์"] | |
| template_file = "๋ฆฌ๋ทฐ๋ถ์.ver1.0.xlsx" | |
| if not os.path.exists(template_file): | |
| return None, ["์ ์ฒด์ต์ ๋ถ์"] | |
| try: | |
| wb = openpyxl.load_workbook(template_file) | |
| except Exception as e: | |
| return None, ["์ ์ฒด์ต์ ๋ถ์"] | |
| # (๋ฏธ์ 1) ์๋ณธ๋ฐ์ดํฐ ์ํธ ์์ฑ | |
| if "์๋ณธ๋ฐ์ดํฐ" in wb.sheetnames: | |
| ws_source = wb["์๋ณธ๋ฐ์ดํฐ"] | |
| wb.remove(ws_source) | |
| ws_source = wb.create_sheet("์๋ณธ๋ฐ์ดํฐ") | |
| else: | |
| ws_source = wb.create_sheet("์๋ณธ๋ฐ์ดํฐ") | |
| for r_idx, row in enumerate(dataframe_to_rows(df_upload, index=False, header=True), start=1): | |
| for c_idx, value in enumerate(row, start=1): | |
| ws_source.cell(row=r_idx, column=c_idx, value=value) | |
| # ๋ฆฌ๋ทฐ ๋ ์ง ์ฒ๋ฆฌ | |
| try: | |
| df_upload['๋ฆฌ๋ทฐ๋ ์ง'] = pd.to_datetime(df_upload.iloc[:, 1], errors='coerce') | |
| df_upload['๋ฆฌ๋ทฐ๋ ์ง'] = df_upload['๋ฆฌ๋ทฐ๋ ์ง'].apply( | |
| lambda d: d.replace(tzinfo=None) if pd.notnull(d) and d.tzinfo is not None else d | |
| ) | |
| df_valid = df_upload.dropna(subset=['๋ฆฌ๋ทฐ๋ ์ง']).copy() | |
| except Exception as e: | |
| return None, ["์ ์ฒด์ต์ ๋ถ์"] | |
| now = datetime.datetime.now() | |
| current_year = now.year | |
| start_year_val = current_year - 2 | |
| end_year_val = current_year | |
| # ๋์๋ณด๋๋ฐ์ดํฐ ์ํธ | |
| if "๋์๋ณด๋๋ฐ์ดํฐ" in wb.sheetnames: | |
| ws_dashboard = wb["๋์๋ณด๋๋ฐ์ดํฐ"] | |
| else: | |
| ws_dashboard = wb.create_sheet("๋์๋ณด๋๋ฐ์ดํฐ") | |
| # (๋ฏธ์ 2) ์ต๊ทผ 3๋ '๋ ์' ์ถ์ด | |
| ws_dashboard["C2"] = str(current_year)[-2:] | |
| row_idx = 5 | |
| for year in range(start_year_val, end_year_val + 1): | |
| for month in range(1, 13): | |
| count = df_valid[(df_valid['๋ฆฌ๋ทฐ๋ ์ง'].dt.year == year) & (df_valid['๋ฆฌ๋ทฐ๋ ์ง'].dt.month == month)].shape[0] | |
| ws_dashboard.cell(row=row_idx, column=3, value=count) | |
| row_idx += 1 | |
| # (๋ฏธ์ 3) ์ต๊ทผ 3๋ '๋ ๋' ์ถ์ด | |
| ws_dashboard["E2"] = str(current_year)[-2:] | |
| row_year = 5 | |
| for y in [current_year, current_year - 1, current_year - 2]: | |
| count = df_valid[df_valid['๋ฆฌ๋ทฐ๋ ์ง'].dt.year == y].shape[0] | |
| ws_dashboard.cell(row=row_year, column=6, value=count) | |
| row_year += 1 | |
| # (๋ฏธ์ 4) ์ ํ๋ ๋ ๋ '์๋ณ' ์ถ์ด | |
| try: | |
| selected_year_int = int("20" + selected_year[:2]) | |
| except Exception as e: | |
| return None, ["์ ์ฒด์ต์ ๋ถ์"] | |
| ws_dashboard["I2"] = selected_year[:2] | |
| for month in range(1, 13): | |
| count = df_valid[(df_valid['๋ฆฌ๋ทฐ๋ ์ง'].dt.year == selected_year_int) & (df_valid['๋ฆฌ๋ทฐ๋ ์ง'].dt.month == month)].shape[0] | |
| ws_dashboard.cell(row=4 + month, column=9, value=count) | |
| # (๋ฏธ์ 5) ์ ํ๋ ๋ ๋ '์์ผ' ์ถ์ด | |
| start_date = datetime.date(selected_year_int, 1, 1) | |
| end_date = datetime.date(selected_year_int, 12, 31) | |
| df_selected = df_valid[df_valid['๋ฆฌ๋ทฐ๋ ์ง'].dt.year == selected_year_int].copy() | |
| df_selected['๋ฆฌ๋ทฐ๋ ์ง_์ผ'] = df_selected['๋ฆฌ๋ทฐ๋ ์ง'].dt.date | |
| daily_counts = df_selected.groupby('๋ฆฌ๋ทฐ๋ ์ง_์ผ').size().to_dict() | |
| row_day = 5 | |
| current_day = start_date | |
| while current_day <= end_date: | |
| ws_dashboard.cell(row=row_day, column=12, value=daily_counts.get(current_day, 0)) | |
| row_day += 1 | |
| current_day += datetime.timedelta(days=1) | |
| # (๋ฏธ์ 6) ์ต๊ทผ 3๋ ์ ์ฒด์ต์ ๋ฐ์ดํฐ ์ ๋ ฅ | |
| df_recent = df_valid[(df_valid['๋ฆฌ๋ทฐ๋ ์ง'].dt.year >= start_year_val) & (df_valid['๋ฆฌ๋ทฐ๋ ์ง'].dt.year <= end_year_val)].copy() | |
| if df_recent.shape[1] >= 3: | |
| df_recent['์ต์ ์๋ณธ'] = df_recent.iloc[:, 2].astype(str).fillna('') | |
| def get_first_option(opt_str): | |
| return opt_str.split(' / ')[0].strip() | |
| df_recent['์ต์ 1'] = df_recent['์ต์ ์๋ณธ'].apply(get_first_option) | |
| option_counts_3y = df_recent['์ต์ 1'].value_counts() | |
| top20_3y = option_counts_3y.head(20) | |
| sum_others_3y = option_counts_3y.iloc[20:].sum() if len(option_counts_3y) > 20 else 0 | |
| row_opt = 5 | |
| for opt_name, cnt in top20_3y.items(): | |
| ws_dashboard.cell(row=row_opt, column=14, value=opt_name) | |
| ws_dashboard.cell(row=row_opt, column=15, value=cnt) | |
| row_opt += 1 | |
| ws_dashboard.cell(row=25, column=15, value=sum_others_3y) | |
| else: | |
| ws_dashboard.cell(row=5, column=14, value="๊ตฌ๋งค์ต์ ์ด์ด ์์ต๋๋ค.") | |
| ws_dashboard.cell(row=5, column=15, value=0) | |
| # (๋ฏธ์ 7) ์ ํ๋ ๋ ๊ธฐ์ค ์ต์ ๋ฐ์ดํฐ (๋ด๋ฆผ์ฐจ์) | |
| df_selected_year = df_valid[df_valid['๋ฆฌ๋ทฐ๋ ์ง'].dt.year == selected_year_int].copy() | |
| if df_selected_year.shape[1] >= 3: | |
| df_selected_year['์ต์ ์๋ณธ'] = df_selected_year.iloc[:, 2].astype(str).fillna('') | |
| def get_first_option(opt_str): | |
| return opt_str.split(' / ')[0].strip() | |
| df_selected_year['์ต์ 1'] = df_selected_year['์ต์ ์๋ณธ'].apply(get_first_option) | |
| option_counts_selected = df_selected_year['์ต์ 1'].value_counts() | |
| top20_selected = option_counts_selected.head(20) | |
| sum_others_selected = option_counts_selected.iloc[20:].sum() if len(option_counts_selected) > 20 else 0 | |
| row_opt = 5 | |
| for opt_name, cnt in top20_selected.items(): | |
| ws_dashboard.cell(row=row_opt, column=18, value=opt_name) | |
| ws_dashboard.cell(row=row_opt, column=19, value=cnt) | |
| row_opt += 1 | |
| ws_dashboard.cell(row=25, column=19, value=sum_others_selected) | |
| else: | |
| ws_dashboard.cell(row=5, column=18, value="๊ตฌ๋งค์ต์ ์ด์ด ์์ต๋๋ค.") | |
| ws_dashboard.cell(row=5, column=19, value=0) | |
| # (๋ฏธ์ 8) ์ต๊ทผ3๋ ํ์ ํํฉ - top 20 ์ต์ | |
| df_recent_score = df_valid[(df_valid['๋ฆฌ๋ทฐ๋ ์ง'].dt.year >= start_year_val) & (df_valid['๋ฆฌ๋ทฐ๋ ์ง'].dt.year <= end_year_val)].copy() | |
| if df_recent_score.shape[1] >= 5: | |
| df_recent_score['์ต์ ์๋ณธ'] = df_recent_score.iloc[:, 2].astype(str).fillna('') | |
| def get_first_option(opt_str): | |
| return opt_str.split(' / ')[0].strip() | |
| df_recent_score['์ต์ 1'] = df_recent_score['์ต์ ์๋ณธ'].apply(get_first_option) | |
| df_recent_score['ํ์ '] = pd.to_numeric(df_recent_score.iloc[:, 4], errors='coerce') | |
| df_recent_score = df_recent_score.dropna(subset=['ํ์ ']) | |
| group_8 = df_recent_score.groupby(['์ต์ 1', 'ํ์ ']).size().unstack(fill_value=0) | |
| group_8 = group_8.reindex(columns=[5,4,3,2,1], fill_value=0) | |
| group_8['total_count'] = group_8.sum(axis=1) | |
| group_8 = group_8.sort_values(by='total_count', ascending=False) | |
| top20_8 = group_8.head(20).copy() | |
| others_8 = group_8.iloc[20:].copy() | |
| sum_others_5 = others_8[5].sum() | |
| sum_others_4 = others_8[4].sum() | |
| sum_others_3 = others_8[3].sum() | |
| sum_others_2 = others_8[2].sum() | |
| sum_others_1 = others_8[1].sum() | |
| row_v = 5 | |
| for opt_name, row_data in top20_8.iterrows(): | |
| ws_dashboard.cell(row=row_v, column=22, value=opt_name) | |
| ws_dashboard.cell(row=row_v, column=24, value=row_data[5]) | |
| ws_dashboard.cell(row=row_v, column=25, value=row_data[4]) | |
| ws_dashboard.cell(row=row_v, column=26, value=row_data[3]) | |
| ws_dashboard.cell(row=row_v, column=27, value=row_data[2]) | |
| ws_dashboard.cell(row=row_v, column=28, value=row_data[1]) | |
| row_v += 1 | |
| ws_dashboard.cell(row=25, column=24, value=sum_others_5) | |
| ws_dashboard.cell(row=25, column=25, value=sum_others_4) | |
| ws_dashboard.cell(row=25, column=26, value=sum_others_3) | |
| ws_dashboard.cell(row=25, column=27, value=sum_others_2) | |
| ws_dashboard.cell(row=25, column=28, value=sum_others_1) | |
| else: | |
| ws_dashboard.cell(row=5, column=22, value="์ต์ ๋๋ ํ์ ์ด์ด ๋ถ์กฑํฉ๋๋ค.") | |
| # (๋ฏธ์ 9) ์ ํ๋ ๋ ํ์ ํํฉ - top 20 ์ต์ | |
| df_selected_score = df_valid[df_valid['๋ฆฌ๋ทฐ๋ ์ง'].dt.year == selected_year_int].copy() | |
| if df_selected_score.shape[1] >= 5: | |
| df_selected_score['์ต์ ์๋ณธ'] = df_selected_score.iloc[:, 2].astype(str).fillna('') | |
| def get_first_option(opt_str): | |
| return opt_str.split(' / ')[0].strip() | |
| df_selected_score['์ต์ 1'] = df_selected_score['์ต์ ์๋ณธ'].apply(get_first_option) | |
| df_selected_score['ํ์ '] = pd.to_numeric(df_selected_score.iloc[:, 4], errors='coerce') | |
| df_selected_score = df_selected_score.dropna(subset=['ํ์ ']) | |
| group_9 = df_selected_score.groupby(['์ต์ 1', 'ํ์ ']).size().unstack(fill_value=0) | |
| group_9 = group_9.reindex(columns=[5,4,3,2,1], fill_value=0) | |
| group_9['total_count'] = group_9.sum(axis=1) | |
| group_9 = group_9.sort_values(by='total_count', ascending=False) | |
| top20_9 = group_9.head(20).copy() | |
| others_9 = group_9.iloc[20:].copy() | |
| sum_others_5 = others_9[5].sum() | |
| sum_others_4 = others_9[4].sum() | |
| sum_others_3 = others_9[3].sum() | |
| sum_others_2 = others_9[2].sum() | |
| sum_others_1 = others_9[1].sum() | |
| row_ad = 5 | |
| for opt_name, row_data in top20_9.iterrows(): | |
| ws_dashboard.cell(row=row_ad, column=30, value=opt_name) | |
| ws_dashboard.cell(row=row_ad, column=32, value=row_data[5]) | |
| ws_dashboard.cell(row=row_ad, column=33, value=row_data[4]) | |
| ws_dashboard.cell(row=row_ad, column=34, value=row_data[3]) | |
| ws_dashboard.cell(row=row_ad, column=35, value=row_data[2]) | |
| ws_dashboard.cell(row=row_ad, column=36, value=row_data[1]) | |
| row_ad += 1 | |
| ws_dashboard.cell(row=25, column=32, value=sum_others_5) | |
| ws_dashboard.cell(row=25, column=33, value=sum_others_4) | |
| ws_dashboard.cell(row=25, column=34, value=sum_others_3) | |
| ws_dashboard.cell(row=25, column=35, value=sum_others_2) | |
| ws_dashboard.cell(row=25, column=36, value=sum_others_1) | |
| else: | |
| ws_dashboard.cell(row=5, column=30, value="์ต์ ๋๋ ํ์ ์ด์ด ๋ถ์กฑํฉ๋๋ค.") | |
| # ์์ ์์ ํ์ผ ์ ์ฅ | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") | |
| wb.save(temp_file.name) | |
| temp_file.close() | |
| # ์ ํ๋ ๋ Top20 ์ต์ ๋ฆฌ์คํธ ์์ฑ (์ ์ฒด์ต์ ๋ถ์ ํฌํจ) | |
| top20_option_list = ["์ ์ฒด์ต์ ๋ถ์"] | |
| if df_selected_year.shape[1] >= 3: | |
| option_counts_selected = df_selected_year['์ต์ 1'].value_counts().head(20) | |
| for opt_name, cnt in option_counts_selected.items(): | |
| top20_option_list.append(f"{opt_name}({cnt})") | |
| return temp_file.name, top20_option_list | |
| # ===================== (2) ๋ฆฌ๋ทฐ๋ถ์: ๋ฏธ์ 10~12 ์คํ ===================== | |
| def analyze_reviews(partial_file, selected_option, llm_model_choice): | |
| """ | |
| '๋ฆฌ๋ทฐ์ต์ ๋ถ์' ๊ฒฐ๊ณผ ์์ ๊ณผ ์ ํ ์ต์ (๋๋ ์ ์ฒด์ต์ ๋ถ์)์ ๋ฐํ์ผ๋ก | |
| ๋ฏธ์ 10~12(์ฃผ์๊ธ์ /๋ถ์ ๋ฆฌ๋ทฐ ์ถ์ถ + LLM ๋ถ์)๋ฅผ ์ํํ๊ณ , | |
| ์ต์ข ์์ ํ์ผ๊ณผ ํ ์คํธ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํ๋ค. | |
| """ | |
| if partial_file is None: | |
| return None, "", "", "", "" | |
| try: | |
| wb = openpyxl.load_workbook(partial_file) | |
| except Exception as e: | |
| return None, "", "", "", "" | |
| if "์๋ณธ๋ฐ์ดํฐ" not in wb.sheetnames: | |
| return None, "", "", "", "" | |
| ws_source = wb["์๋ณธ๋ฐ์ดํฐ"] | |
| data = ws_source.values | |
| cols = next(data) | |
| df_upload = pd.DataFrame(data, columns=cols) | |
| # (๋ฏธ์ 10) ์ฃผ์ ๋ฆฌ๋ทฐ ๋ด์ฉ ์ํธ ์์ฑ | |
| if "์ฃผ์ ๋ฆฌ๋ทฐ ๋ด์ฉ" in wb.sheetnames: | |
| ws_review = wb["์ฃผ์ ๋ฆฌ๋ทฐ ๋ด์ฉ"] | |
| wb.remove(ws_review) | |
| ws_review = wb.create_sheet("์ฃผ์ ๋ฆฌ๋ทฐ ๋ด์ฉ") | |
| else: | |
| ws_review = wb.create_sheet("์ฃผ์ ๋ฆฌ๋ทฐ ๋ด์ฉ") | |
| if df_upload.shape[1] < 5: | |
| ws_review.cell(row=1, column=1, value="๋ฆฌ๋ทฐ๋ด์ฉ ๋ฐ ํ์ ์ด์ด ๋ถ์กฑํฉ๋๋ค.") | |
| return None, "", "", "", "" | |
| df_upload['์ต์ ์๋ณธ'] = df_upload.iloc[:, 2].astype(str).fillna('') | |
| if selected_option == "์ ์ฒด์ต์ ๋ถ์": | |
| df_filtered = df_upload.copy() | |
| else: | |
| selected_opt_name = selected_option.rsplit("(", 1)[0].strip() | |
| def get_first_option(opt_str): | |
| return opt_str.split(' / ')[0].strip() | |
| df_upload['์ต์ 1'] = df_upload['์ต์ ์๋ณธ'].apply(get_first_option) | |
| df_filtered = df_upload[df_upload['์ต์ 1'] == selected_opt_name].copy() | |
| # (๋ฏธ์ 10) ์ฃผ์ ๋ฆฌ๋ทฐ ๋ด์ฉ(ํ์ =3 ์ ์ธ) + ๊ธ์์ ์ด ์์ฑ โ ๋ด๋ฆผ์ฐจ์ ์ ๋ ฌ | |
| df_review = df_filtered.copy() | |
| df_review = df_review[df_review.iloc[:,4] != 3] | |
| df_review['๊ธ์์'] = df_review.iloc[:,3].astype(str).apply(len) | |
| df_review = df_review.sort_values(by='๊ธ์์', ascending=False) | |
| for r_idx, row in enumerate(dataframe_to_rows(df_review, index=False, header=True), start=1): | |
| for c_idx, value in enumerate(row, start=1): | |
| ws_review.cell(row=r_idx, column=c_idx, value=value) | |
| # (๋ฏธ์ 11) ๊ธ์ /๋ถ์ ๋ฆฌ๋ทฐ ์ถ์ถ | |
| df_positive = df_review[(df_review.iloc[:,4] == 5) & (df_review['๊ธ์์'] < 500)].copy() | |
| if df_positive.shape[0] < 20: | |
| df_positive_4 = df_review[(df_review.iloc[:,4] == 4) & (df_review['๊ธ์์'] < 500)] | |
| df_positive = pd.concat([df_positive, df_positive_4]) | |
| df_positive = df_positive.head(20) | |
| positive_reviews = "" | |
| for idx, row in df_positive.iterrows(): | |
| positive_reviews += f"์์ด๋: {row.iloc[0]}, ์ ์: {row.iloc[4]}, ๊ธ์์: {row['๊ธ์์']}\n๋ฆฌ๋ทฐ: {row.iloc[3]}\n\n" | |
| df_negative = df_review[(df_review.iloc[:,4] == 1) & (df_review['๊ธ์์'] < 500)].copy() | |
| if df_negative.shape[0] < 30: | |
| df_negative_2 = df_review[(df_review.iloc[:,4] == 2) & (df_review['๊ธ์์'] < 500)] | |
| df_negative = pd.concat([df_negative, df_negative_2]) | |
| df_negative = df_negative.head(30) | |
| negative_reviews = "" | |
| for idx, row in df_negative.iterrows(): | |
| negative_reviews += f"์์ด๋: {row.iloc[0]}, ์ ์: {row.iloc[4]}, ๊ธ์์: {row['๊ธ์์']}\n๋ฆฌ๋ทฐ: {row.iloc[3]}\n\n" | |
| # (๋ฏธ์ 12) LLM์ ์ด์ฉํ ๋ฆฌ๋ทฐ ๋ถ์ | |
| positive_system_msg = ( | |
| "๐ ๊ธ์ ๋ฆฌ๋ทฐ ๋ถ์:\n" | |
| "๋๋ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ๋ฅผ ๋ถ์ํ๋ ๋น ๋ฐ์ดํฐ ๋ถ์๊ฐ์ด๋ค. ๊ณ ๊ฐ์ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ๋ฅผ ๋ฐํ์ผ๋ก ๊ธ์ ์ ์ธ ์๊ฒฌ๋ง์ ๋ถ์ํด๋ผ. ๋ฐ๋์ ์ ๊ณต๋ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ์์๋ง ๋ถ์ํ๋ฉฐ, ๋์ ์๊ฐ์ ํฌํจํ์ง ๋ง ๊ฒ.\n" | |
| "[๋ถ์ ์กฐ๊ฑด]\n" | |
| "- ๋ฐ๋์ ๊ธ์ ์ ์ธ ์๊ฒฌ๋ง ๋ถ์ํ๊ณ , ๋ถ์ ์ ์ธ ์๊ฒฌ์ ์ ์ธํ ๊ฒ.\n" | |
| "- ๊ธฐ๋ฅ ๋ฐ ์ฑ๋ฅ, ๊ฐ์ฑ, ์ค์ ์ฌ์ฉ, ๋ฐฐ์ก, ํ๊ฒ๋ณ ๊ด์ ์ผ๋ก ๋ถ์ํ ๊ฒ.\n" | |
| "- ๋ง์ผํ ์ ํ์ฉํ ์ ์๋ ๊ณ ๊ฐ์ ์ค์ ๋ฆฌ๋ทฐ ๋จ์ด๋ฅผ ๋ฐ๋์ ํฌํจํ ๊ฒ.\n" | |
| "[์ถ๋ ฅ ํํ]\n" | |
| "- ๊ฐ๊ฐ์ ์ ๋ชฉ ์์๋ '๐' ์ด๋ชจ์ง๋ฅผ ์ฌ์ฉํ๋ฉฐ, '#'๋ '##'์ ์ฌ์ฉํ์ง ๋ง ๊ฒ.\n" | |
| "- ๊ฐ์ฅ ๋ง์ง๋ง์๋ \"๐์ข ํฉ์๊ฒฌ\"์ด๋ผ๋ ์ ๋ชฉ์ผ๋ก ์ข ํฉ ์๊ฒฌ์ ์์ฑํ๋ผ.\n" | |
| " - ์ข ํฉ์๊ฒฌ์๋ ํญ๋ชฉ๋ณ ์ ๋ชฉ์ ์ ์ธํ๊ณ ์์ ์ ๋ฌธ์ฅ์ผ๋ก ์์ฑํ ๊ฒ.\n" | |
| " - ์ด์ด์ '๐น ๊ฐ์ '๊ณผ '๐น ๊ธฐํ' ์ ๋ชฉ์ผ๋ก SWOT ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ์ ๊ณตํ ๊ฒ.\n" | |
| "- ์ค์ ๊ณ ๊ฐ์ ๋ฆฌ๋ทฐ์์ ์ฌ์ฉ๋ ๋จ์ด๋ฅผ ๋ฐ๋์ ํฌํจํ ๊ฒ." | |
| ) | |
| positive_user_msg = "๋ค์ ๋ฆฌ๋ทฐ๋ฅผ ๋ถ์ํด ์ฃผ์ธ์:\n" + positive_reviews | |
| negative_system_msg = ( | |
| "๐ ๋ถ์ ๋ฆฌ๋ทฐ ๋ถ์:\n" | |
| "๋๋ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ๋ฅผ ๋ถ์ํ๋ ๋น ๋ฐ์ดํฐ ๋ถ์๊ฐ์ด๋ค. ๊ณ ๊ฐ์ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ๋ฅผ ๋ฐํ์ผ๋ก ๋ถ์ ์ ์ธ ์๊ฒฌ๋ง์ ๋ถ์ํด๋ผ. ๋ฐ๋์ ์ ๊ณต๋ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ์์๋ง ๋ถ์ํ๋ฉฐ, ๋์ ์๊ฐ์ ํฌํจํ์ง ๋ง ๊ฒ.\n" | |
| "[๋ถ์ ์กฐ๊ฑด]\n" | |
| "- ๋ถ์ ์ ์ธ ์๊ฒฌ๋ง ๋ถ์ํ๊ณ , ๊ธ์ ์ ์ธ ์๊ฒฌ์ ์ ์ธํ ๊ฒ.\n" | |
| "- ๊ธฐ๋ฅ ๋ฐ ์ฑ๋ฅ, ๊ฐ์ฑ, ์ค์ ์ฌ์ฉ, ๋ฐฐ์ก, ๊ณ ๊ฐ์ ๋ถ๋ ธ ๊ด์ ์ผ๋ก ๋ถ์ํ ๊ฒ.\n" | |
| "- ๋ถ์ ์ ์ธ ๋ฆฌ๋ทฐ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํ์ผ๋ก '๊ฐ์ ํ ์ '์ ์ถ๋ ฅํ ๊ฒ.\n" | |
| "[์ถ๋ ฅ ํํ]\n" | |
| "- ๊ฐ๊ฐ์ ์ ๋ชฉ ์์๋ '๐' ์ด๋ชจ์ง๋ฅผ ์ฌ์ฉํ๋ฉฐ, '#'๋ '##'์ ์ฌ์ฉํ์ง ๋ง ๊ฒ.\n" | |
| "- ๊ฐ์ฅ ๋ง์ง๋ง์๋ \"๐ข๊ฐ์ ํ ์ \"์ด๋ผ๋ ์ ๋ชฉ์ผ๋ก ๊ฐ์ ํ ์ ์ ์์ฑํ๋ผ.\n" | |
| " - ๊ฐ์ ํ ์ ์๋ ํญ๋ชฉ๋ณ ์ ๋ชฉ์ ์ ์ธํ๊ณ ์์ ์ ๋ฌธ์ฅ์ผ๋ก ์์ฑํ ๊ฒ.\n" | |
| " - ์ด์ด์ '๐ ์ฝ์ '๊ณผ '๐ ์ํ' ์ ๋ชฉ์ผ๋ก SWOT ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ์ ๊ณตํ ๊ฒ.\n" | |
| "- ์ค์ ๊ณ ๊ฐ์ ๋ฆฌ๋ทฐ์์ ์ฌ์ฉ๋ ๋จ์ด๋ฅผ ๋ฐ๋์ ํฌํจํ ๊ฒ." | |
| ) | |
| negative_user_msg = "๋ค์ ๋ฆฌ๋ทฐ๋ฅผ ๋ถ์ํด ์ฃผ์ธ์:\n" + negative_reviews | |
| if llm_model_choice == "ChatGPT (gpt-4o-mini)": | |
| positive_analysis = call_api( | |
| positive_user_msg, positive_system_msg, | |
| max_tokens=15000, temperature=0.3, top_p=0.95 | |
| ) | |
| negative_analysis = call_api( | |
| negative_user_msg, negative_system_msg, | |
| max_tokens=15000, temperature=0.3, top_p=0.95 | |
| ) | |
| elif llm_model_choice == "Gemini Flash (gemini-2.0-flash)": | |
| positive_analysis = respond_gemini_qna( | |
| positive_user_msg, positive_system_msg, | |
| max_tokens=15000, temperature=0.3, top_p=0.95, | |
| model_id="gemini-2.0-flash" | |
| ) | |
| negative_analysis = respond_gemini_qna( | |
| negative_user_msg, negative_system_msg, | |
| max_tokens=15000, temperature=0.3, top_p=0.95, | |
| model_id="gemini-2.0-flash" | |
| ) | |
| elif llm_model_choice == "o1-mini": | |
| positive_analysis = respond_o1mini_qna( | |
| positive_user_msg, positive_system_msg, | |
| max_tokens=15000, temperature=1 | |
| ) | |
| negative_analysis = respond_o1mini_qna( | |
| negative_user_msg, negative_system_msg, | |
| max_tokens=15000, temperature=1 | |
| ) | |
| else: | |
| positive_analysis = "LLM ๋ชจ๋ธ ์ ํ ์ค๋ฅ" | |
| negative_analysis = "LLM ๋ชจ๋ธ ์ ํ ์ค๋ฅ" | |
| if "๋์๋ณด๋๋ฐ์ดํฐ" in wb.sheetnames: | |
| ws_dashboard = wb["๋์๋ณด๋๋ฐ์ดํฐ"] | |
| ws_dashboard["AL5"] = positive_analysis | |
| ws_dashboard["AM5"] = negative_analysis | |
| final_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") | |
| wb.save(final_file.name) | |
| final_file.close() | |
| return final_file.name, positive_reviews, negative_reviews, positive_analysis, negative_analysis | |
| # ===================== Gradio UI ๊ตฌ์ฑ (HTML/CSS ์ปค์คํฐ๋ง์ด์ง ์ ์ฉ) ===================== | |
| custom_css = """ | |
| body { | |
| background: #f7f7f7; | |
| font-family: 'Arial', sans-serif; | |
| } | |
| .gradio-container { | |
| padding: 20px; | |
| } | |
| .custom-header { | |
| text-align: center; | |
| font-size: 36px; | |
| font-weight: bold; | |
| margin-bottom: 20px; | |
| color: #333; | |
| } | |
| .custom-frame { | |
| border: 2px solid #ccc; | |
| border-radius: 20px; | |
| padding: 20px; | |
| margin: 10px 0; | |
| background-color: #fff; | |
| } | |
| .custom-button { | |
| border-radius: 20px !important; | |
| background: #4caf50 !important; | |
| color: white !important; | |
| font-size: 20px !important; | |
| padding: 10px 20px !important; | |
| } | |
| .custom-title { | |
| font-size: 28px; | |
| font-weight: bold; | |
| margin-bottom: 10px; | |
| color: #555; | |
| } | |
| .custom-subtitle { | |
| font-size: 22px; | |
| font-weight: bold; | |
| margin-bottom: 8px; | |
| color: #666; | |
| } | |
| .instructions { | |
| background-color: #e0f7fa; | |
| border: 2px dashed #00796b; | |
| border-radius: 15px; | |
| padding: 15px; | |
| font-size: 18px; | |
| color: #004d40; | |
| margin-bottom: 20px; | |
| text-align: left; | |
| } | |
| """ | |
| # ๊ฐ๋จํ๊ณ ์ข์ธก ์ ๋ ฌ๋ ๋จ๊ณ๋ณ ์ฌ์ฉ์ค๋ช ์ (๋จผ์ ์ ์) | |
| usage_instructions = """ | |
| <div class="instructions"> | |
| <p style="font-size:24px; font-weight:bold;">๐ ์ฌ์ฉ์ค๋ช ์</p> | |
| <p style="font-size:16px;"> | |
| 1๋จ๊ณ: ์ข์ธก "๐ ๋ฐ์ดํฐ ์ ๋ ฅ"์์ ํ์ผ ์ ๋ก๋์ ๋ถ์๋ ๋ ์ ํ ํ, ๋ฆฌ๋ทฐ์ต์ ๋ถ์ ๋ฒํผ ํด๋ฆญ<br> | |
| 2๋จ๊ณ: ํ๋จ "๐ ๋ฆฌ๋ทฐ๋ถ์ ์ต์ ์ ํ"์์ LLM ๋ชจ๋ธ๊ณผ ์์ดํ ์ต์ ์ ํ ํ, ๋ฆฌ๋ทฐ๋ถ์ ์ต์ ์ ํ ๋ฒํผ ํด๋ฆญ<br> | |
| 3๋จ๊ณ: ์ฐ์ธก "๐๋ถ์๋ณด๊ณ ์ ๋ค์ด๋ก๋"์์ ๋ณด๊ณ ์๋ฅผ ๋ค์ด๋ก๋ํ๊ณ ๊ฒฐ๊ณผ ํ์ธ<br> | |
| </p> | |
| </div> | |
| """ | |
| with gr.Blocks(css=custom_css, title="๋ฆฌ๋ทฐ ๋ถ์ ์๋น์ค") as demo: | |
| gr.HTML("<div class='custom-header'>๐ ๊ณ ๊ฐ ๋ฆฌ๋ทฐ ๋ถ์ ์๋น์ค ๐</div>") | |
| gr.HTML(usage_instructions) | |
| # [๋ฐ์ดํฐ ์ ๋ ฅ ๋ฐ ๋ถ์๋ณด๊ณ ์ ๋ค์ด๋ก๋ ํ๋ ์] (์ข/์ฐ ๋ฐฐ์น) | |
| with gr.Row(): | |
| with gr.Column(elem_classes="custom-frame"): | |
| gr.HTML("<div class='custom-title'>๐ ๋ฐ์ดํฐ ์ ๋ ฅ</div>") | |
| file_input = gr.File(label="์๋ณธ ์์ ํ์ผ ์ ๋ก๋", file_types=[".xlsx"]) | |
| year_radio = gr.Radio( | |
| choices=[f"{str(y)[-2:]}๋ " for y in range(datetime.datetime.now().year, datetime.datetime.now().year-5, -1)], | |
| label="๋ถ์๋ ๋ ์ ํ", | |
| value=f"{str(datetime.datetime.now().year)[-2:]}๋ " | |
| ) | |
| analyze_button = gr.Button("๋ฆฌ๋ทฐ์ต์ ๋ถ์", elem_classes="custom-button") | |
| with gr.Column(elem_classes="custom-frame"): | |
| gr.HTML("<div class='custom-title'>๐๋ถ์๋ณด๊ณ ์ ๋ค์ด๋ก๋</div>") | |
| download_final_output = gr.File(label="๋ณด๊ณ ์ ๋ค์ด๋ก๋") | |
| # [๋ฆฌ๋ทฐ๋ถ์ ํ๋ ์] (๋ฐ์ดํฐ ์ ๋ ฅ ํ๋ ์๊ณผ ๋ณ๋; ์ด๊ธฐ์๋ ์จ๊น) | |
| with gr.Column(elem_classes="custom-frame", visible=False) as review_analysis_frame: | |
| gr.HTML("<div class='custom-title'>๋ฆฌ๋ทฐ๋ถ์ ์ต์ ์ ํ</div>") | |
| llm_model_radio = gr.Radio( | |
| choices=["ChatGPT (gpt-4o-mini)", "Gemini Flash (gemini-2.0-flash)", "o1-mini"], | |
| label="LLM ๋ชจ๋ธ ์ ํ", | |
| value="ChatGPT (gpt-4o-mini)" | |
| ) | |
| top20_dropdown = gr.Dropdown( | |
| label="์์ดํ ์ต์ ๋ถ์", | |
| choices=["์ ์ฒด์ต์ ๋ถ์"], | |
| value="์ ์ฒด์ต์ ๋ถ์" | |
| ) | |
| review_button = gr.Button("๐ ๋ฆฌ๋ทฐ๋ถ์ ์ต์ ์ ํ", elem_classes="custom-button") | |
| # [๋ถ์ ๊ฒฐ๊ณผ ํ๋ ์] - ์๋จ ์ ๋ชฉ ์ถ๊ฐ: "๐ ์ต์ ๋ณ ๋ฆฌ๋ทฐ๋ถ์" | |
| with gr.Column(elem_classes="custom-frame"): | |
| gr.HTML("<div class='custom-title'>๐ ์ต์ ๋ณ ๋ฆฌ๋ทฐ๋ถ์</div>") | |
| with gr.Row(): | |
| with gr.Column(elem_classes="custom-frame"): | |
| gr.HTML("<div class='custom-subtitle'>โจ ์ฃผ์๊ธ์ ๋ฆฌ๋ทฐ</div>") | |
| positive_output = gr.Textbox(label="๊ธ์ ๋ฆฌ๋ทฐ๋ฆฌ์คํธ(20๊ฐ)", lines=10) | |
| with gr.Column(elem_classes="custom-frame"): | |
| gr.HTML("<div class='custom-subtitle'>โจ ์ฃผ์๋ถ์ ๋ฆฌ๋ทฐ</div>") | |
| negative_output = gr.Textbox(label="๋ถ์ ๋ฆฌ๋ทฐ๋ฆฌ์คํธ(30๊ฐ)", lines=10) | |
| with gr.Row(): | |
| with gr.Column(elem_classes="custom-frame"): | |
| gr.HTML("<div class='custom-subtitle'>๐ข ๊ธ์ ๋ฆฌ๋ทฐ ๋ถ์</div>") | |
| positive_analysis_output = gr.Textbox(label="๊ธ์ ๋ฆฌ๋ทฐ ๋ถ์", lines=8) | |
| with gr.Column(elem_classes="custom-frame"): | |
| gr.HTML("<div class='custom-subtitle'>๐ข ๋ถ์ ๋ฆฌ๋ทฐ ๋ถ์</div>") | |
| negative_analysis_output = gr.Textbox(label="๋ถ์ ๋ฆฌ๋ทฐ ๋ถ์", lines=8) | |
| # hidden state: ๋ฆฌ๋ทฐ์ต์ ๋ถ์ ๊ฒฐ๊ณผ ์์ ํ์ผ ์ ์ฅ | |
| partial_file_state = gr.State() | |
| # [๋ฐ์ดํฐ ์ ๋ ฅ] - ๋ฆฌ๋ทฐ์ต์ ๋ถ์ ๋ฒํผ ํด๋ฆญ ์ ์คํ: | |
| # ๋ฆฌ๋ทฐ์ต์ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ์ ์ฅํ๊ณ , ๋ฆฌ๋ทฐ๋ถ์ ํ๋ ์์ ๋ณด์ด๊ฒ ํ๋ฉฐ ์์ดํ ์ต์ ๋ถ์ ๋๋กญ๋ค์ด ์ ๋ฐ์ดํธ | |
| def on_click_analyze_options(uploaded_file, selected_year): | |
| partial_file, top20_list = analyze_options(uploaded_file, selected_year, "ChatGPT (gpt-4o-mini)") | |
| return partial_file, gr.update(visible=True), gr.update(choices=top20_list, value="์ ์ฒด์ต์ ๋ถ์") | |
| analyze_button.click( | |
| fn=on_click_analyze_options, | |
| inputs=[file_input, year_radio], | |
| outputs=[partial_file_state, review_analysis_frame, top20_dropdown] | |
| ) | |
| # [๋ฆฌ๋ทฐ๋ถ์] - ๋ฆฌ๋ทฐ๋ถ์ ๋ฒํผ ํด๋ฆญ ์ ์คํ | |
| def on_click_analyze_reviews(partial_file, selected_option, llm_model): | |
| final_file, pos_reviews, neg_reviews, pos_analysis, neg_analysis = analyze_reviews( | |
| partial_file, selected_option, llm_model | |
| ) | |
| return final_file, pos_reviews, neg_reviews, pos_analysis, neg_analysis | |
| review_button.click( | |
| fn=on_click_analyze_reviews, | |
| inputs=[partial_file_state, top20_dropdown, llm_model_radio], | |
| outputs=[download_final_output, positive_output, negative_output, positive_analysis_output, negative_analysis_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |