Commit
·
44fb3b3
1
Parent(s):
da05e38
improved the prompt
Browse files
app.py
CHANGED
|
@@ -42,7 +42,20 @@ def webpage_to_json(content: str, is_url: bool, schema_name: str) -> Dict[str, A
|
|
| 42 |
return {"error": f"Invalid schema name: {schema_name}. Choose from: {', '.join(SCHEMA_OPTIONS.keys())}"}
|
| 43 |
|
| 44 |
schema = SCHEMA_OPTIONS[schema_name]
|
| 45 |
-
prompt_template = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
# Initialize pipeline components
|
| 48 |
preprocessor = BasicPreprocessor(config={'keep_tags': False})
|
|
|
|
| 42 |
return {"error": f"Invalid schema name: {schema_name}. Choose from: {', '.join(SCHEMA_OPTIONS.keys())}"}
|
| 43 |
|
| 44 |
schema = SCHEMA_OPTIONS[schema_name]
|
| 45 |
+
prompt_template = """Extract the following information from the provided content according to the specified schema.
|
| 46 |
+
|
| 47 |
+
Content to analyze:
|
| 48 |
+
{content}
|
| 49 |
+
|
| 50 |
+
Schema requirements:
|
| 51 |
+
{schema}
|
| 52 |
+
|
| 53 |
+
Instructions:
|
| 54 |
+
- Extract only information that is explicitly present in the content
|
| 55 |
+
- Follow the exact structure and data types specified in the schema
|
| 56 |
+
- If a required field cannot be found, indicate this clearly
|
| 57 |
+
- Preserve the original formatting and context where relevant
|
| 58 |
+
- Return the extracted data in the format specified by the schema"""
|
| 59 |
|
| 60 |
# Initialize pipeline components
|
| 61 |
preprocessor = BasicPreprocessor(config={'keep_tags': False})
|