| import jsonlines |
|
|
| |
| input_file = "data/thirdStep_file.jsonl" |
|
|
| |
| output_file = "data/train4465" |
|
|
| |
| threshold = 0.5 |
|
|
| |
| options = [ |
| {"id": "CapitalRequirements", "text": "Capital Requirements", "meta": "0.00"}, |
| {"id": "ConsumerProtection", "text": "Consumer Protection", "meta": "0.00"}, |
| {"id": "RiskManagement", "text": "Risk Management", "meta": "0.00"}, |
| {"id": "ReportingAndCompliance", "text": "Reporting And Compliance", "meta": "0.00"}, |
| {"id": "CorporateGovernance", "text": "Corporate Governance", "meta": "0.00"} |
| ] |
|
|
| |
| def process_record(record): |
| |
| text = record["text"] |
| predicted_labels = record["predicted_labels"] |
| |
| |
| accepted_categories = [label for label, score in predicted_labels.items() if score > threshold] |
| |
| |
| answer = "accept" if accepted_categories else "reject" |
| |
| |
| options_with_meta = [ |
| {"id": option["id"], "text": option["text"], "meta": option["meta"]} for option in options |
| ] |
| |
| |
| output_record = { |
| "text": text, |
| "cats": predicted_labels, |
| "accept": accepted_categories, |
| "answer": answer, |
| "options": options_with_meta |
| } |
| |
| return output_record |
|
|
| |
| with jsonlines.open(input_file, "r") as infile, jsonlines.open(output_file, "w") as outfile: |
| for record in infile: |
| output_record = process_record(record) |
| outfile.write(output_record) |
|
|