ranggafermata commited on
Commit
c55ae80
·
verified ·
1 Parent(s): f1d453e

Update backend/app.py

Browse files
Files changed (1) hide show
  1. backend/app.py +191 -190
backend/app.py CHANGED
@@ -1,191 +1,192 @@
1
- from flask import Flask, request, Response, jsonify
2
- from flask_cors import CORS
3
- from PIL import Image
4
- import torch
5
- from transformers import AutoProcessor, BlipForConditionalGeneration
6
- from llama_cpp import Llama
7
- import json
8
- from tavily import TavilyClient
9
- import os
10
- from dotenv import load_dotenv
11
-
12
- load_dotenv()
13
-
14
- TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
15
-
16
- app = Flask(__name__)
17
- CORS(app)
18
-
19
- device = "cuda" if torch.cuda.is_available() else "cpu"
20
- print(f"Using device: {device}")
21
-
22
- # --- Load Models ---
23
- try:
24
- vision_processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
25
- vision_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device)
26
- print("BLIP Vision model loaded successfully.")
27
- except Exception as e:
28
- print(f"Error loading Vision model: {e}")
29
- vision_model = None
30
-
31
- try:
32
- llm = Llama.from_pretrained(
33
- repo_id="ranggafermata/Effort-1",
34
- filename="EffortQ43B.gguf",
35
- n_ctx=2048,
36
- n_gpu_layers=-1,
37
- verbose=False,
38
- chat_format="llama-3" # Use the standard Llama 3 chat format
39
- )
40
- print("Effort 1 model loaded successfully.")
41
- except Exception as e:
42
- print(f"Error loading Effort 1 model: {e}")
43
- llm = None
44
-
45
- # Load Endeavor preview model (pro)
46
- try:
47
- llm_endeavor = Llama.from_pretrained(
48
- repo_id="ranggafermata/Endeavor-1",
49
- filename="EndeavorPreview-q4.gguf",
50
- n_ctx=2048,
51
- n_gpu_layers=-1,
52
- verbose=False,
53
- chat_format="llama-3"
54
- )
55
- print("Endeavor 1 (preview) model loaded successfully.")
56
- except Exception as e:
57
- print(f"Error loading Endeavor 1 model: {e}")
58
- llm_endeavor = None
59
-
60
- try:
61
- # Initialize the Tavily client instance
62
- tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
63
- print("Tavily client initialized successfully.")
64
- except Exception as e:
65
- print(f"Error initializing Tavily client: {e}")
66
-
67
- @app.route("/research", methods=["POST"])
68
- def research():
69
-
70
- global tavily_client
71
-
72
- if not tavily_client:
73
- return jsonify({"error": "Tavily client not available"}), 500
74
-
75
- data = request.get_json()
76
- task = data.get("task")
77
- query = data.get("query")
78
-
79
- if not task or not query:
80
- return jsonify({"error": "Missing task or query"}), 400
81
-
82
- try:
83
- if task == 'search':
84
- results = tavily_client.search(query=query, search_depth="advanced")
85
- elif task == 'extract':
86
- results = tavily_client.extract(urls=[query])
87
- else:
88
- return jsonify({"error": "Invalid task"}), 400
89
-
90
- return jsonify(results)
91
-
92
- except Exception as e:
93
- print(f"Error during Tavily research: {e}")
94
- tavily_client = None
95
-
96
- return jsonify({"error": str(e)}), 500
97
-
98
-
99
- # --- Main Endpoint ---
100
- @app.route("/completion", methods=["POST"])
101
- def completion():
102
- prompt = request.form.get("prompt", "")
103
- history_json = request.form.get("history", "[]")
104
- image_file = request.files.get("image")
105
- model_choice = request.form.get("model", "effort")
106
-
107
-
108
- pil_image = None
109
- if image_file:
110
- try:
111
- pil_image = Image.open(image_file.stream).convert("RGB")
112
- except Exception as e:
113
- print(f"Error opening image file: {e}")
114
- pil_image = None
115
-
116
- try:
117
- chat_history = json.loads(history_json)
118
- except json.JSONDecodeError:
119
- chat_history = []
120
-
121
- def generate_stream(user_prompt, image_obj, history, model_choice_inner):
122
- if image_obj:
123
- if vision_model:
124
- try:
125
- img = pil_image.resize((384, 384)) # Resize to match model input size
126
- inputs = (
127
- vision_processor(images=img, text=user_prompt, return_tensors="pt").to(device)
128
- if user_prompt else vision_processor(images=img, return_tensors="pt").to(device)
129
- )
130
- output = vision_model.generate(**inputs, max_new_tokens=50)
131
- caption = vision_processor.decode(output[0], skip_special_tokens=True).strip()
132
- yield f"data: {json.dumps({'content': caption})}\n\n"
133
- except Exception as e:
134
- print(f"Error processing image: {e}")
135
- yield f"data: {json.dumps({'content': 'Sorry, I had trouble reading that image.'})}\n\n"
136
- else:
137
- yield f"data: {json.dumps({'content': 'Vision model not available.'})}\n\n"
138
-
139
- else:
140
- # Choose model instance
141
- if model_choice_inner == "endeavor" and llm_endeavor:
142
- llm_to_use = llm_endeavor
143
- else:
144
- llm_to_use = llm
145
-
146
- if llm_to_use:
147
- try:
148
- # Different system prompts for Effort vs Endeavor
149
- if llm_to_use is llm_endeavor:
150
- system_message = {
151
- "role": "system",
152
- "content": (
153
- "You are Endeavor — a professional-grade AI assistant. "
154
- "Provide concise, accurate, and context-aware answers. Prioritize technical precision, "
155
- "clarity, and safety. When asked for code, include runnable examples and brief explanations. "
156
- "When unsure, state limitations and offer next steps or references."
157
- )
158
- }
159
- else:
160
- system_message = {
161
- "role": "system",
162
- "content": "You are a helpful and brilliant AI assistant named Effort."
163
- }
164
-
165
- messages = [system_message] + history + [{"role": "user", "content": user_prompt}]
166
-
167
- print(f"Sending {len(messages)} messages to the model ({'endeavor' if llm_to_use is llm_endeavor else 'effort'}).")
168
-
169
- stream = llm_to_use.create_chat_completion(
170
- messages=messages,
171
- max_tokens=1024,
172
- temperature=0.7,
173
- stream=True
174
- )
175
-
176
- for output in stream:
177
- token = output["choices"][0]["delta"].get("content", "")
178
- if token:
179
- yield f"data: {json.dumps({'content': token})}\n\n"
180
-
181
- except Exception as e:
182
- print(f"Error during text generation: {e}")
183
- yield f"data: {json.dumps({'content': 'I encountered an error.'})}\n\n"
184
- else:
185
- yield f"data: {json.dumps({'content': 'Requested text model not available.'})}\n\n"
186
-
187
- return Response(generate_stream(prompt, image_file, chat_history, model_choice), mimetype="text-event-stream")
188
-
189
-
190
- if __name__ == "__main__":
 
191
  app.run(host="0.0.0.0", port=8080)
 
1
+ from flask import Flask, request, Response, jsonify
2
+ from flask_cors import CORS
3
+ from PIL import Image
4
+ import torch
5
+ from transformers import AutoProcessor, BlipForConditionalGeneration
6
+ from llama_cpp import Llama
7
+ import json
8
+ from tavily import TavilyClient
9
+ import os
10
+ from dotenv import load_dotenv
11
+
12
+ load_dotenv()
13
+
14
+ TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
15
+
16
+ app = Flask(__name__)
17
+ CORS(app, resources={r"/completion": {"origins": ["https://bangorinas.com", "https://www.bangorinas.com"]},
18
+ r"/research": {"origins": ["https://bangorinas.com", "https://www.bangorinas.com"]}})
19
+
20
+ device = "cuda" if torch.cuda.is_available() else "cpu"
21
+ print(f"Using device: {device}")
22
+
23
+ # --- Load Models ---
24
+ try:
25
+ vision_processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
26
+ vision_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device)
27
+ print("BLIP Vision model loaded successfully.")
28
+ except Exception as e:
29
+ print(f"Error loading Vision model: {e}")
30
+ vision_model = None
31
+
32
+ try:
33
+ llm = Llama.from_pretrained(
34
+ repo_id="ranggafermata/Effort-1",
35
+ filename="EffortQ43B.gguf",
36
+ n_ctx=2048,
37
+ n_gpu_layers=-1,
38
+ verbose=False,
39
+ chat_format="llama-3" # Use the standard Llama 3 chat format
40
+ )
41
+ print("Effort 1 model loaded successfully.")
42
+ except Exception as e:
43
+ print(f"Error loading Effort 1 model: {e}")
44
+ llm = None
45
+
46
+ # Load Endeavor preview model (pro)
47
+ try:
48
+ llm_endeavor = Llama.from_pretrained(
49
+ repo_id="ranggafermata/Endeavor-1",
50
+ filename="EndeavorPreview-q4.gguf",
51
+ n_ctx=2048,
52
+ n_gpu_layers=-1,
53
+ verbose=False,
54
+ chat_format="llama-3"
55
+ )
56
+ print("Endeavor 1 (preview) model loaded successfully.")
57
+ except Exception as e:
58
+ print(f"Error loading Endeavor 1 model: {e}")
59
+ llm_endeavor = None
60
+
61
+ try:
62
+ # Initialize the Tavily client instance
63
+ tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
64
+ print("Tavily client initialized successfully.")
65
+ except Exception as e:
66
+ print(f"Error initializing Tavily client: {e}")
67
+
68
+ @app.route("/research", methods=["POST"])
69
+ def research():
70
+
71
+ global tavily_client
72
+
73
+ if not tavily_client:
74
+ return jsonify({"error": "Tavily client not available"}), 500
75
+
76
+ data = request.get_json()
77
+ task = data.get("task")
78
+ query = data.get("query")
79
+
80
+ if not task or not query:
81
+ return jsonify({"error": "Missing task or query"}), 400
82
+
83
+ try:
84
+ if task == 'search':
85
+ results = tavily_client.search(query=query, search_depth="advanced")
86
+ elif task == 'extract':
87
+ results = tavily_client.extract(urls=[query])
88
+ else:
89
+ return jsonify({"error": "Invalid task"}), 400
90
+
91
+ return jsonify(results)
92
+
93
+ except Exception as e:
94
+ print(f"Error during Tavily research: {e}")
95
+ tavily_client = None
96
+
97
+ return jsonify({"error": str(e)}), 500
98
+
99
+
100
+ # --- Main Endpoint ---
101
+ @app.route("/completion", methods=["POST"])
102
+ def completion():
103
+ prompt = request.form.get("prompt", "")
104
+ history_json = request.form.get("history", "[]")
105
+ image_file = request.files.get("image")
106
+ model_choice = request.form.get("model", "effort")
107
+
108
+
109
+ pil_image = None
110
+ if image_file:
111
+ try:
112
+ pil_image = Image.open(image_file.stream).convert("RGB")
113
+ except Exception as e:
114
+ print(f"Error opening image file: {e}")
115
+ pil_image = None
116
+
117
+ try:
118
+ chat_history = json.loads(history_json)
119
+ except json.JSONDecodeError:
120
+ chat_history = []
121
+
122
+ def generate_stream(user_prompt, image_obj, history, model_choice_inner):
123
+ if image_obj:
124
+ if vision_model:
125
+ try:
126
+ img = pil_image.resize((384, 384)) # Resize to match model input size
127
+ inputs = (
128
+ vision_processor(images=img, text=user_prompt, return_tensors="pt").to(device)
129
+ if user_prompt else vision_processor(images=img, return_tensors="pt").to(device)
130
+ )
131
+ output = vision_model.generate(**inputs, max_new_tokens=50)
132
+ caption = vision_processor.decode(output[0], skip_special_tokens=True).strip()
133
+ yield f"data: {json.dumps({'content': caption})}\n\n"
134
+ except Exception as e:
135
+ print(f"Error processing image: {e}")
136
+ yield f"data: {json.dumps({'content': 'Sorry, I had trouble reading that image.'})}\n\n"
137
+ else:
138
+ yield f"data: {json.dumps({'content': 'Vision model not available.'})}\n\n"
139
+
140
+ else:
141
+ # Choose model instance
142
+ if model_choice_inner == "endeavor" and llm_endeavor:
143
+ llm_to_use = llm_endeavor
144
+ else:
145
+ llm_to_use = llm
146
+
147
+ if llm_to_use:
148
+ try:
149
+ # Different system prompts for Effort vs Endeavor
150
+ if llm_to_use is llm_endeavor:
151
+ system_message = {
152
+ "role": "system",
153
+ "content": (
154
+ "You are Endeavor a professional-grade AI assistant. "
155
+ "Provide concise, accurate, and context-aware answers. Prioritize technical precision, "
156
+ "clarity, and safety. When asked for code, include runnable examples and brief explanations. "
157
+ "When unsure, state limitations and offer next steps or references."
158
+ )
159
+ }
160
+ else:
161
+ system_message = {
162
+ "role": "system",
163
+ "content": "You are a helpful and brilliant AI assistant named Effort."
164
+ }
165
+
166
+ messages = [system_message] + history + [{"role": "user", "content": user_prompt}]
167
+
168
+ print(f"Sending {len(messages)} messages to the model ({'endeavor' if llm_to_use is llm_endeavor else 'effort'}).")
169
+
170
+ stream = llm_to_use.create_chat_completion(
171
+ messages=messages,
172
+ max_tokens=1024,
173
+ temperature=0.7,
174
+ stream=True
175
+ )
176
+
177
+ for output in stream:
178
+ token = output["choices"][0]["delta"].get("content", "")
179
+ if token:
180
+ yield f"data: {json.dumps({'content': token})}\n\n"
181
+
182
+ except Exception as e:
183
+ print(f"Error during text generation: {e}")
184
+ yield f"data: {json.dumps({'content': 'I encountered an error.'})}\n\n"
185
+ else:
186
+ yield f"data: {json.dumps({'content': 'Requested text model not available.'})}\n\n"
187
+
188
+ return Response(generate_stream(prompt, image_file, chat_history, model_choice), mimetype="text-event-stream")
189
+
190
+
191
+ if __name__ == "__main__":
192
  app.run(host="0.0.0.0", port=8080)