alexander-lazarin commited on
Commit
39cdc3e
·
1 Parent(s): c860e9d

Initial commit

Browse files
Files changed (2) hide show
  1. app.py +279 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import csv
4
+ import tempfile
5
+ from io import StringIO
6
+ from clickhouse_driver import Client
7
+ from clickhouse_driver.errors import Error as ClickHouseError
8
+ import gradio as gr
9
+
10
+ try:
11
+ from google.colab import userdata
12
+ DB_PASSWORD = userdata.get('FASHION_CH_PASS')
13
+ except:
14
+ DB_PASSWORD = os.environ['FASHION_CH_PASS']
15
+
16
+ class DBConnectionManager:
17
+ def __init__(self, max_connection_age=3600): # 1 hour max connection age
18
+ self.client = None
19
+ self.last_connection_time = None
20
+ self.max_connection_age = max_connection_age
21
+
22
+ def get_connection(self):
23
+ if self.client is None or self._should_refresh_connection():
24
+ self._create_connection()
25
+ return self.client
26
+
27
+ def _create_connection(self):
28
+ if self.client:
29
+ self.close_connection()
30
+
31
+ self.client = Client(
32
+ host='rc1d-a93v7vf0pjfr6e2o.mdb.yandexcloud.net',
33
+ port=9440,
34
+ user='user1',
35
+ password=DB_PASSWORD,
36
+ database='db1',
37
+ secure=True,
38
+ ca_certs='./RootCA.pem'
39
+ )
40
+ self.last_connection_time = time.time()
41
+
42
+ def _should_refresh_connection(self):
43
+ if self.last_connection_time is None:
44
+ return True
45
+ return (time.time() - self.last_connection_time) > self.max_connection_age
46
+
47
+ def execute_query(self, query, params=None):
48
+ max_retries = 3
49
+ for attempt in range(max_retries):
50
+ try:
51
+ connection = self.get_connection()
52
+ return connection.execute(query, params)
53
+ except ClickHouseError as e:
54
+ print(f"Database error on attempt {attempt + 1}: {e}")
55
+ if attempt < max_retries - 1:
56
+ print("Retrying...")
57
+ self._create_connection() # Force a new connection
58
+ else:
59
+ raise
60
+
61
+ def close_connection(self):
62
+ if self.client:
63
+ self.client.disconnect()
64
+ self.client = None
65
+ self.last_connection_time = None
66
+
67
+ db_manager = DBConnectionManager()
68
+
69
+ def get_category_a_options():
70
+ query = "SELECT DISTINCT category_a FROM datamart_lamoda_grownups_mc_4 ORDER BY category_a"
71
+ result = db_manager.execute_query(query)
72
+ return [row[0] for row in result]
73
+
74
+ def update_category_3(category_a):
75
+ print("entering update_category_a")
76
+ try:
77
+ query = """
78
+ SELECT DISTINCT category_3
79
+ FROM datamart_lamoda_grownups_mc_4
80
+ WHERE category_a in %(category_a)s
81
+ ORDER BY category_3
82
+ """
83
+ result = db_manager.execute_query(query, {'category_a': category_a})
84
+ category_3_options = [row[0] for row in result]
85
+ return gr.CheckboxGroup(choices=category_3_options, label="Category 3", interactive=True)
86
+ except Exception as e:
87
+ print(f"Error updating category 3: {e}")
88
+ return gr.CheckboxGroup(choices=[], label="Category 3", interactive=True)
89
+
90
+ def update_category_4(category_a, category_3):
91
+ print("entering update_category_4")
92
+ try:
93
+ query = """
94
+ SELECT DISTINCT category_4
95
+ FROM datamart_lamoda_grownups_mc_4
96
+ WHERE 1=1
97
+ and category_a in %(category_a)s
98
+ and category_3 in %(category_3)s
99
+ ORDER BY category_4
100
+ """
101
+ result = db_manager.execute_query(query, {'category_a': category_a, 'category_3': category_3})
102
+ category_4_options = [row[0] for row in result]
103
+ return gr.CheckboxGroup(choices=category_4_options, label="Category 4", interactive=True)
104
+ except Exception as e:
105
+ print(f"Error updating category 4: {e}")
106
+ return gr.CheckboxGroup(choices=[], label="Category 4", interactive=True)
107
+
108
+ def generate_csv_report(category_a, category_3, category_4):
109
+ query = """
110
+ WITH
111
+ sku_prices AS (
112
+ SELECT
113
+ id_product_money,
114
+ category_a,
115
+ category_3,
116
+ category_4,
117
+ avg(avg_price) AS sku_avg_price
118
+ FROM datamart_lamoda_grownups_mc_4
119
+ WHERE price_tier in ('Medium', 'High', 'Premium')
120
+ AND category_a IN %(category_a)s
121
+ AND category_3 IN %(category_3)s
122
+ AND category_4 IN %(category_4)s
123
+ GROUP BY all
124
+ ),
125
+ median_prices AS (
126
+ SELECT
127
+ category_a,
128
+ category_3,
129
+ category_4,
130
+ median(sku_avg_price) AS median_price
131
+ FROM sku_prices
132
+ GROUP BY all
133
+ ),
134
+ main AS(
135
+ SELECT
136
+ d.category_a AS category_a,
137
+ d.category_3 AS category_3,
138
+ d.category_4 AS category_4,
139
+ 500*round(0.2*median_price/500) AS step,
140
+ step*round(sku_avg_price/step) AS price_group,
141
+ turnover,
142
+ total_sales,
143
+ d.id_product_money AS id_product_money,
144
+ avg_price,
145
+ season,
146
+ brand_rank,
147
+ stock_increase,
148
+ initial_stock,
149
+ days_in_stock,
150
+ total_days_in_period,
151
+ median_price,
152
+ brand_name,
153
+ is_first_week,
154
+ week_start_date
155
+ FROM datamart_lamoda_grownups_mc_4 d
156
+ LEFT JOIN median_prices m
157
+ USING (category_a, category_3, category_4)
158
+ LEFT JOIN sku_prices s
159
+ ON d.id_product_money = s.id_product_money
160
+ WHERE price_tier in ('Medium', 'High', 'Premium')
161
+ AND d.category_a IN %(category_a)s
162
+ AND d.category_3 IN %(category_3)s
163
+ AND d.category_4 IN %(category_4)s
164
+ )
165
+ SELECT
166
+ year(week_start_date)::text AS YEAR,
167
+ min(week_start_date)::text AS min_week_start_date,
168
+ max(week_start_date)::text AS max_week_start_date,
169
+ category_a,
170
+ category_3,
171
+ category_4,
172
+ brand_name,
173
+ price_group,
174
+ sum(turnover) as sum_turnover,
175
+ sum(total_sales) as count_sales,
176
+ count(distinct id_product_money) as count_sku,
177
+ round(avg(avg_price)) as avg_price1,
178
+ round(sum_turnover/IF(count_sales=0,NULL,count_sales)) as avg_price2,
179
+ uniqExactIf(id_product_money, season = 'AW 23') as count_sku_aw_23,
180
+ uniqExactIf(id_product_money, season = 'SS 24') as count_sku_ss_24,
181
+ sumIf(turnover, season = 'AW 23') as sum_turnover_aw_23,
182
+ sumIf(turnover, season = 'SS 24') as sum_turnover_ss_24,
183
+ round(sum_turnover/IF(count_sku=0,NULL,count_sku)) as turnover_per_sku,
184
+ round(sum_turnover_aw_23/IF(count_sku_aw_23=0,NULL,count_sku_aw_23)) as turnover_per_sku_aw_23,
185
+ round(sum_turnover_ss_24/IF(count_sku_ss_24=0,NULL,count_sku_ss_24)) as turnover_per_sku_ss_24,
186
+ round(1.0*sumIf(turnover, brand_rank <= 10)/IF(sum_turnover=0,NULL,sum_turnover), 3) as top10_turnover_share,
187
+ round(1.0*sumIf(turnover, brand_name = 'BLCV')/IF(sum_turnover=0,NULL,sum_turnover), 3) as blcv_turnover_share,
188
+ round(1.0*count_sales/IF((sum(stock_increase) + max(initial_stock))=0,NULL,(sum(stock_increase) + sumIf(initial_stock, is_first_week=1))), 3) as sales_through_rate,
189
+ round(1.0*uniqExactIf(id_product_money, total_sales > 0)/IF(count_sku=0,NULL,count_sku), 3) as sold_sku_share,
190
+ round(1.0*sum(days_in_stock)/IF(sum(total_days_in_period)=0,NULL,sum(total_days_in_period)), 3) as availability_index,
191
+ max(median_price) AS median_price
192
+ FROM main
193
+ GROUP BY all
194
+ ORDER BY all
195
+ """
196
+
197
+ params = {
198
+ 'category_a': category_a,
199
+ 'category_3': category_3,
200
+ 'category_4': category_4
201
+ }
202
+
203
+ try:
204
+ print('trying to run the query')
205
+ result = db_manager.execute_query(query, params)
206
+
207
+ # Create a CSV string
208
+ with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv', encoding='utf-8') as temp_file:
209
+ csv_writer = csv.writer(temp_file)
210
+
211
+ # Write header
212
+ header = [
213
+ "YEAR", "min_week_start_date", "max_week_start_date", "category_a", "category_3", "category_4",
214
+ "brand_name", "price_group", "sum_turnover", "count_sales", "count_sku", "avg_price1",
215
+ "avg_price2", "count_sku_aw_23", "count_sku_ss_24", "sum_turnover_aw_23", "sum_turnover_ss_24",
216
+ "turnover_per_sku", "turnover_per_sku_aw_23", "turnover_per_sku_ss_24", "top10_turnover_share",
217
+ "blcv_turnover_share", "sales_through_rate", "sold_sku_share", "availability_index", "median_price"
218
+ ]
219
+ csv_writer.writerow(header)
220
+
221
+ # Write data rows
222
+ csv_writer.writerows(result)
223
+
224
+ return temp_file.name
225
+ except Exception as e:
226
+ print(f"Error generating CSV report: {e}")
227
+ return None
228
+
229
+ def download_csv(category_a, category_3, category_4):
230
+ csv_content = generate_csv_report(category_a, category_3, category_4)
231
+ if csv_content:
232
+ return csv_content
233
+ else:
234
+ raise gr.Error("Error generating CSV report. Please try again.")
235
+
236
+ def interface():
237
+ with gr.Blocks() as demo:
238
+ category_a_options = get_category_a_options()
239
+
240
+ category_a = gr.CheckboxGroup(choices=category_a_options, label="Category A")
241
+ category_3 = gr.CheckboxGroup(choices=[], label="Category 3", interactive=True)
242
+ category_4 = gr.CheckboxGroup(choices=[], label="Category 4", interactive=True)
243
+ download_button = gr.Button("Download CSV Report")
244
+ csv_output = gr.File(label="CSV Report")
245
+
246
+ category_a.change(
247
+ fn=update_category_3,
248
+ inputs=[category_a],
249
+ outputs=[category_3]
250
+ )
251
+ category_3.change(
252
+ fn=update_category_4,
253
+ inputs=[category_a, category_3],
254
+ outputs=[category_4]
255
+ )
256
+ download_button.click(
257
+ fn=download_csv,
258
+ inputs=[category_a, category_3, category_4],
259
+ outputs=[csv_output]
260
+ )
261
+
262
+ return demo
263
+
264
+
265
+ def cleanup_temp_files():
266
+ temp_dir = tempfile.gettempdir()
267
+ for filename in os.listdir(temp_dir):
268
+ if filename.endswith('.csv'):
269
+ file_path = os.path.join(temp_dir, filename)
270
+ try:
271
+ os.remove(file_path)
272
+ except Exception as e:
273
+ print(f"Error deleting temporary file {file_path}: {e}")
274
+
275
+ if __name__ == "__main__":
276
+ demo = interface()
277
+ demo.launch(debug=True)
278
+ db_manager.close_connection() # Close the connection when the app exits
279
+ cleanup_temp_files()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ clickhouse-driver