Spaces:
Sleeping
Sleeping
| Repository Documentation | |
| This document provides a comprehensive overview of the repository's structure and contents. | |
| The first section, titled 'Directory/File Tree', displays the repository's hierarchy in a tree format. | |
| In this section, directories and files are listed using tree branches to indicate their structure and relationships. | |
| Following the tree representation, the 'File Content' section details the contents of each file in the repository. | |
| Each file's content is introduced with a '[File Begins]' marker followed by the file's relative path, | |
| and the content is displayed verbatim. The end of each file's content is marked with a '[File Ends]' marker. | |
| This format ensures a clear and orderly presentation of both the structure and the detailed contents of the repository. | |
| Directory/File Tree Begins --> | |
| / | |
| ├── README.md | |
| ├── __pycache__ | |
| ├── app.py | |
| ├── database-structure.txt | |
| ├── gematria.py | |
| ├── requirements.txt | |
| ├── texts | |
| └── util.py | |
| <-- Directory/File Tree Ends | |
| File Content Begin --> | |
| [File Begins] README.md | |
| --- | |
| title: Tanach Network | |
| emoji: 📊 | |
| colorFrom: green | |
| colorTo: pink | |
| sdk: gradio | |
| sdk_version: 4.39.0 | |
| app_file: app.py | |
| pinned: false | |
| --- | |
| Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference | |
| [File Ends] README.md | |
| [File Begins] app.py | |
| import gradio as gr | |
| import json | |
| import re | |
| import sqlite3 | |
| import logging | |
| from collections import defaultdict | |
| from typing import Tuple, Dict, List | |
| # Assuming you have these files in your project | |
| from util import process_json_files | |
| from gematria import calculate_gematria | |
| from deep_translator import GoogleTranslator, exceptions | |
| from urllib.parse import quote_plus | |
| from tqdm import tqdm | |
| # Constants | |
| DATABASE_FILE = 'gematria.db' | |
| MAX_PHRASE_LENGTH_LIMIT = 20 | |
| BATCH_SIZE = 10000 | |
| # Set up logging | |
| logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') | |
| # Global variables | |
| conn: sqlite3.Connection = None | |
| translator: GoogleTranslator = None | |
| book_names: Dict[int, str] = {} | |
| gematria_cache: Dict[Tuple[int, int], List[Tuple[str, str, int, int, int, str]]] = {} | |
| translation_cache: Dict[str, str] = {} | |
| total_word_count: int = 0 # Global counter for word position | |
| def initialize_database() -> None: | |
| """Initializes the SQLite database.""" | |
| global conn | |
| conn = sqlite3.connect(DATABASE_FILE) | |
| cursor = conn.cursor() | |
| cursor.execute(''' | |
| CREATE TABLE IF NOT EXISTS results ( | |
| gematria_sum INTEGER, | |
| words TEXT, | |
| translation TEXT, | |
| book TEXT, | |
| chapter INTEGER, | |
| verse INTEGER, | |
| phrase_length INTEGER, | |
| word_position TEXT, | |
| PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position) | |
| ) | |
| ''') | |
| cursor.execute(''' | |
| CREATE INDEX IF NOT EXISTS idx_results_gematria | |
| ON results (gematria_sum) | |
| ''') | |
| cursor.execute(''' | |
| CREATE TABLE IF NOT EXISTS processed_books ( | |
| book TEXT PRIMARY KEY, | |
| max_phrase_length INTEGER | |
| ) | |
| ''') | |
| conn.commit() | |
| def initialize_translator() -> None: | |
| """Initializes the Google Translator.""" | |
| global translator | |
| translator = GoogleTranslator(source='iw', target='en') | |
| logging.info("Translator initialized.") | |
| def process_book(book_id: int, max_phrase_length: int, cursor): | |
| """Processes a single book and returns phrases to insert.""" | |
| global book_names, total_word_count | |
| book_data = process_json_files(book_id, book_id) | |
| phrases_to_insert = [] | |
| if book_id in book_data: | |
| book_data = book_data[book_id] | |
| if 'title' not in book_data or not isinstance(book_data['title'], str): | |
| logging.warning(f"Skipping book {book_id} due to missing 'title' field.") | |
| return phrases_to_insert | |
| title = book_data['title'] | |
| book_names[book_id] = title | |
| # Check if this book has already been processed for this phrase length | |
| cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (title,)) | |
| result = cursor.fetchone() | |
| if result and result[0] >= max_phrase_length: | |
| logging.info(f"Skipping book {title}: Already processed with max_phrase_length {result[0]}") | |
| return phrases_to_insert | |
| if 'text' not in book_data or not isinstance(book_data['text'], list): | |
| logging.warning(f"Skipping book {book_id} due to missing 'text' field.") | |
| return phrases_to_insert | |
| chapters = book_data['text'] | |
| for chapter_id, chapter in enumerate(chapters): | |
| for verse_id, verse in enumerate(chapter): | |
| verse_text = flatten_text(verse) | |
| verse_text = re.sub(r'\[.*?\]', '', verse_text) | |
| verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text) | |
| verse_text = re.sub(r" +", " ", verse_text) | |
| words = verse_text.split() | |
| for length in range(1, max_phrase_length + 1): | |
| for start in range(len(words) - length + 1): | |
| phrase_candidate = " ".join(words[start:start + length]) | |
| gematria_sum = calculate_gematria(phrase_candidate.replace(" ", "")) | |
| word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}" | |
| phrases_to_insert.append( | |
| (gematria_sum, phrase_candidate, None, title, chapter_id + 1, verse_id + 1, length, | |
| word_position_range)) | |
| total_word_count += len(words) | |
| return phrases_to_insert | |
| def populate_database(start_book: int, end_book: int, max_phrase_length: int = 1) -> None: | |
| """Populates the database with phrases from the Tanach.""" | |
| global conn, book_names, total_word_count | |
| logging.info(f"Populating database with books from {start_book} to {end_book}...") | |
| with sqlite3.connect(DATABASE_FILE) as conn: | |
| cursor = conn.cursor() | |
| for book_id in tqdm(range(start_book, end_book + 1), desc="Processing Books"): | |
| phrases_to_insert = process_book(book_id, max_phrase_length, cursor) | |
| if phrases_to_insert: | |
| cursor.executemany(''' | |
| INSERT OR IGNORE INTO results (gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position) | |
| VALUES (?, ?, ?, ?, ?, ?, ?, ?) | |
| ''', phrases_to_insert) | |
| # Update processed_books after processing each book | |
| cursor.execute(''' | |
| INSERT OR REPLACE INTO processed_books (book, max_phrase_length) | |
| VALUES (?, ?) | |
| ''', (book_names[book_id], max_phrase_length)) | |
| conn.commit() | |
| total_word_count = 0 # Reset for the next set of phrase lengths | |
| def get_translation(phrase: str) -> str: | |
| """Retrieves or generates the English translation of a Hebrew phrase | |
| and caches it in the database. | |
| """ | |
| global conn, translator, translation_cache | |
| # Check if the translation exists in the database | |
| with sqlite3.connect(DATABASE_FILE) as conn: | |
| cursor = conn.cursor() | |
| cursor.execute("SELECT translation FROM results WHERE words = ? LIMIT 1", (phrase,)) | |
| result = cursor.fetchone() | |
| if result and result[0]: # If a translation exists, use it | |
| return result[0] | |
| # If no translation in the database, translate and store it | |
| translation = translate_and_store(phrase) | |
| translation_cache[phrase] = translation | |
| # Update the database with the new translation | |
| with sqlite3.connect(DATABASE_FILE) as conn: | |
| cursor = conn.cursor() | |
| cursor.execute("UPDATE results SET translation = ? WHERE words = ?", (translation, phrase)) | |
| conn.commit() | |
| return translation | |
| def translate_and_store(phrase: str) -> str: | |
| """Translates a Hebrew phrase to English using Google Translate.""" | |
| global translator | |
| max_retries = 3 | |
| retries = 0 | |
| while retries < max_retries: | |
| try: | |
| translation = translator.translate(phrase) | |
| return translation | |
| except (exceptions.TranslationNotFound, exceptions.NotValidPayload, | |
| exceptions.ServerException, exceptions.RequestError) as e: | |
| retries += 1 | |
| logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})") | |
| logging.error(f"Failed to translate phrase '{phrase}' after {max_retries} retries.") | |
| return "[Translation Error]" | |
| def search_gematria_in_db(gematria_sum: int, max_words: int) -> List[Tuple[str, str, int, int, int, str]]: | |
| """Searches the database for phrases with a given Gematria value.""" | |
| global conn | |
| with sqlite3.connect(DATABASE_FILE) as conn: | |
| cursor = conn.cursor() | |
| cursor.execute(''' | |
| SELECT words, book, chapter, verse, phrase_length, word_position | |
| FROM results | |
| WHERE gematria_sum = ? AND phrase_length <= ? | |
| ''', (gematria_sum, max_words)) | |
| results = cursor.fetchall() | |
| return results | |
| def gematria_search_interface(phrases: str, max_words: int, show_translation: bool) -> str: | |
| """The main function for the Gradio interface, handling multiple phrases.""" | |
| global conn, book_names, gematria_cache | |
| results = [] | |
| all_results = [] # Store results for each phrase | |
| middle_words_results = [] # Store middle word results for all books | |
| all_names_average_position = 0 # Initialize variable for average position across all names and books | |
| total_name_count = 0 # Initialize counter for the total number of names processed | |
| phrases = phrases.strip().splitlines() | |
| if not phrases: | |
| return "Please enter at least one phrase." | |
| for phrase in phrases: | |
| if not phrase.strip(): | |
| continue # Skip empty lines | |
| numbers = re.findall(r'\d+', phrase) | |
| text_without_numbers = re.sub(r'\d+', '', phrase) | |
| phrase_gematria = calculate_gematria(text_without_numbers.replace(" ", "")) | |
| phrase_gematria += sum(int(number) for number in numbers) | |
| if (phrase_gematria, max_words) in gematria_cache: | |
| matching_phrases = gematria_cache[(phrase_gematria, max_words)] | |
| else: | |
| matching_phrases = search_gematria_in_db(phrase_gematria, max_words) | |
| gematria_cache[(phrase_gematria, max_words)] = matching_phrases | |
| if not matching_phrases: | |
| results.append(f"No matching phrases found for: {phrase}") | |
| continue | |
| sorted_phrases = sorted(matching_phrases, | |
| key=lambda x: (int(list(book_names.keys())[list(book_names.values()).index(x[1])]), x[2], | |
| x[3])) | |
| results_by_book = defaultdict(list) | |
| for words, book, chapter, verse, phrase_length, word_position in sorted_phrases: | |
| results_by_book[book].append((words, chapter, verse, phrase_length, word_position)) | |
| results.append(f"<h2>Results for: {phrase} (Gematria: {phrase_gematria})</h2>") | |
| results.append("<div class='results-container'>") | |
| for book, phrases in results_by_book.items(): | |
| for words, chapter, verse, phrase_length, word_position in phrases: | |
| translation = get_translation(words) if show_translation else "" | |
| link = f"https://www.biblegateway.com/passage/?search={quote_plus(book)}+{chapter}%3A{verse}&version=CJB" | |
| results.append(f""" | |
| <div class='result-item'> | |
| <p><b>Book:</b> {book}</p> | |
| <p><b>Chapter:</b> {chapter}, <b>Verse:</b> {verse}</p> | |
| <p class='hebrew-phrase'><b>Hebrew Phrase:</b> {words}</p> | |
| <p><b>Translation:</b> {translation}</p> | |
| <p><b>Phrase Length:</b> {phrase_length} words</p> | |
| <p><b>Phrase Gematria:</b> {phrase_gematria}</p> | |
| <p><b>Word Position in the Tanach:</b> {word_position}</p> | |
| <a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a> | |
| </div> | |
| """) | |
| # Calculate average position for the current name across all books | |
| name_average_position = calculate_average_position_for_name(results_by_book) | |
| if name_average_position is not None: | |
| results.append(f"<p><b>Average Word Position for '{phrase}' across all books:</b> {name_average_position:.2f}</p>") | |
| all_names_average_position += name_average_position | |
| total_name_count += 1 | |
| results.append("</div>") | |
| all_results.append(results_by_book) # Store results by book without the phrase | |
| # Calculate the average word position across all names and all their books | |
| if total_name_count > 0: | |
| all_names_average_position /= total_name_count | |
| results.append(f"<h2>Average Word Position Across All Names and Books: {all_names_average_position:.2f}</h2>") | |
| # Calculate middle words for all input lines (common books) | |
| if len(all_results) >= 2: | |
| results.append("<h2>Middle Words (Common Books):</h2>") | |
| results.append("<div class='results-container'>") | |
| common_books = set.intersection(*[set(results.keys()) for results in all_results]) | |
| logging.debug(f"Common books: {common_books}") | |
| for book in common_books: | |
| logging.debug(f"Processing book: {book}") | |
| # Find nearest positions for all phrases in the current book | |
| nearest_positions = find_nearest_positions([results[book] for results in all_results]) | |
| logging.debug(f"Nearest positions in {book}: {nearest_positions}") | |
| if nearest_positions: | |
| middle_word_position = sum(nearest_positions) / len(nearest_positions) | |
| logging.debug(f"Calculated middle word position in {book}: {middle_word_position}") | |
| start_position = int(middle_word_position) | |
| end_position = start_position + 1 if middle_word_position % 1 != 0 else start_position | |
| logging.debug(f"Middle word position range in {book}: {start_position}-{end_position}") | |
| middle_words_data = get_words_from_db(book, start_position, end_position) | |
| logging.debug(f"Middle words data fetched from database: {middle_words_data}") | |
| if middle_words_data: | |
| # Store middle word data along with book name for sorting | |
| middle_words_results.extend([(book, data) for data in middle_words_data]) | |
| else: | |
| # Handle edge case: fetch words independently for start and end positions | |
| logging.debug(f"No middle words found for range {start_position}-{end_position}. " | |
| f"Fetching words independently.") | |
| middle_words_data_start = get_words_from_db(book, start_position, start_position) | |
| middle_words_data_end = get_words_from_db(book, end_position, end_position) | |
| if middle_words_data_start or middle_words_data_end: | |
| middle_words_results.extend([(book, data) for data in middle_words_data_start + middle_words_data_end]) | |
| # Sort middle words results by book order before displaying | |
| middle_words_results.sort(key=lambda x: int(list(book_names.keys())[list(book_names.values()).index(x[0])])) | |
| for book, (words, chapter, verse, phrase_length, word_position) in middle_words_results: | |
| translation = get_translation(words) if show_translation else "" | |
| link = f"https://www.biblegateway.com/passage/?search={quote_plus(book)}+{chapter}%3A{verse}&version=CJB" | |
| results.append(f""" | |
| <div class='result-item'> | |
| <p><b>Book:</b> {book}</p> | |
| <p><b>Chapter:</b> {chapter}, <b>Verse:</b> {verse}</p> | |
| <p class='hebrew-phrase'><b>Hebrew Phrase:</b> {words}</p> | |
| <p><b>Translation:</b> {translation}</p> | |
| <p><b>Phrase Length:</b> {phrase_length} words</p> | |
| <p><b>Word Position in the Tanach:</b> {word_position}</p> | |
| <a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a> | |
| </div> | |
| """) | |
| results.append("</div>") | |
| # Style modified to position search on top and results below | |
| style = """ | |
| <style> | |
| .results-container { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); | |
| gap: 20px; | |
| width: 100%; /* Make results container take full width */ | |
| } | |
| .result-item { | |
| border: 1px solid #ccc; | |
| padding: 15px; | |
| border-radius: 5px; | |
| box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1); | |
| } | |
| .hebrew-phrase { | |
| font-family: 'SBL Hebrew', 'Ezra SIL', serif; | |
| direction: rtl; | |
| } | |
| .bible-link { | |
| display: block; | |
| margin-top: 10px; | |
| color: #007bff; | |
| text-decoration: none; | |
| } | |
| </style> | |
| """ | |
| return style + "\n".join(results) | |
| def calculate_average_position_for_name(results_by_book: Dict[str, List[Tuple]]) -> float: | |
| """Calculates the average word position for a single name across all books.""" | |
| positions = [] | |
| for book, phrases in results_by_book.items(): | |
| for _, _, _, _, word_position in phrases: | |
| start, end = map(int, word_position.split('-')) | |
| positions.append((start + end) / 2) | |
| return sum(positions) / len(positions) if positions else None | |
| def find_nearest_positions(results_lists: List[List]) -> List[int]: | |
| """Finds the nearest word positions among multiple lists of results.""" | |
| nearest_positions = [] | |
| for i in range(len(results_lists)): | |
| positions_i = [(int(pos.split('-')[0]) + int(pos.split('-')[1])) / 2 | |
| for _, _, _, _, pos in results_lists[i]] # Get average of start and end positions | |
| logging.debug(f"Positions for phrase {i+1}: {positions_i}") | |
| # Calculate the average position for the current phrase | |
| average_position = sum(positions_i) / len(positions_i) if positions_i else None | |
| logging.debug(f"Average position for phrase {i+1}: {average_position}") | |
| if average_position is not None: | |
| nearest_positions.append(average_position) | |
| return nearest_positions | |
| def get_words_from_db(book: str, start_position: int, end_position: int) -> List[Tuple]: | |
| """Fetches words from the database based on the book and exact word position range.""" | |
| global conn | |
| logging.debug(f"Fetching words from database for {book} at positions {start_position}-{end_position}") | |
| with sqlite3.connect(DATABASE_FILE) as conn: | |
| cursor = conn.cursor() | |
| cursor.execute(""" | |
| SELECT words, chapter, verse, phrase_length, word_position | |
| FROM results | |
| WHERE book = ? AND word_position = ? | |
| """, (book, f"{start_position}-{end_position}")) # Directly compare word_position | |
| results = cursor.fetchall() | |
| logging.debug(f"Words fetched from database: {results}") | |
| return results | |
| def flatten_text(text: List) -> str: | |
| """Flattens nested lists into a single list.""" | |
| if isinstance(text, list): | |
| return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text) | |
| return text | |
| def run_app() -> None: | |
| """Initializes and launches the Gradio app.""" | |
| global conn | |
| initialize_database() | |
| initialize_translator() | |
| logging.info("Starting database population...") | |
| for max_phrase_length in range(1, MAX_PHRASE_LENGTH_LIMIT + 1): | |
| populate_database(1, 39, max_phrase_length=max_phrase_length) | |
| logging.info("Database population complete.") | |
| with gr.Blocks() as iface: # Use gr.Blocks() for layout control | |
| with gr.Row(): # Place inputs in a row | |
| textbox = gr.Textbox(label="Enter word(s) or numbers (one phrase per line)", lines=5) | |
| slider = gr.Slider(label="Max Word Count in Result Phrases", minimum=1, | |
| maximum=MAX_PHRASE_LENGTH_LIMIT, step=1, | |
| value=1) | |
| checkbox = gr.Checkbox(label="Show Translation", value=True) | |
| with gr.Row(): # Place buttons in a row | |
| clear_button = gr.Button("Clear") | |
| submit_button = gr.Button("Submit", variant="primary") | |
| html_output = gr.HTML(label="Results") # Output for the results | |
| submit_button.click(fn=gematria_search_interface, | |
| inputs=[textbox, slider, checkbox], | |
| outputs=html_output) | |
| clear_button.click(fn=lambda: "", inputs=None, outputs=html_output) # Clear the output | |
| iface.launch() | |
| if __name__ == "__main__": | |
| run_app() | |
| [File Ends] app.py | |
| [File Begins] database-structure.txt | |
| Gematria Sum, Words, Translation, Book, Chapter, Verse, Phrase Length, Phrase Position | |
| 913 בראשית Genesis 1 1 1 1-1 | |
| 1116 בראשית ברא Genesis 1 1 2 1-2 | |
| 1762 בראשית ברא אלהים Genesis 1 1 3 1-3 | |
| 2163 בראשית ברא אלהים את Genesis 1 1 4 1-4 | |
| 3118 בראשית ברא אלהים את השמים Genesis 1 1 5 1-5 | |
| 3525 בראשית ברא אלהים את השמים ואת Genesis 1 1 6 1-6 | |
| [File Ends] database-structure.txt | |
| [File Begins] gematria.py | |
| import unicodedata | |
| def strip_diacritics(text): | |
| """ | |
| Entfernt Diakritika von Unicode-Zeichen, um den Basisbuchstaben zu erhalten, und gibt Warnungen | |
| für tatsächlich unbekannte Zeichen aus. | |
| """ | |
| stripped_text = '' | |
| for char in unicodedata.normalize('NFD', text): | |
| if unicodedata.category(char) not in ['Mn', 'Cf']: | |
| stripped_text += char | |
| else: | |
| print(f"Info: Diakritisches Zeichen '{char}' wird ignoriert.") | |
| return stripped_text | |
| def letter_to_value(letter): | |
| """ | |
| Konvertiert einen einzelnen Buchstaben in seinen Gematria-Wert, ignoriert Leerzeichen | |
| und Nicht-Buchstaben-Zeichen. | |
| """ | |
| # Dein vorhandenes Wörterbuch bleibt unverändert | |
| values = { | |
| # Lateinische Buchstaben | |
| 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 600, | |
| 'k': 10, 'l': 20, 'm': 30, 'n': 40, 'o': 50, 'p': 60, 'q': 70, 'r': 80, 's': 90, | |
| 't': 100, 'u': 200, 'v': 700, 'w': 900, 'x': 300, 'y': 400, 'z': 500, | |
| 'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5, 'F': 6, 'G': 7, 'H': 8, 'I': 9, 'J': 600, | |
| 'K': 10, 'L': 20, 'M': 30, 'N': 40, 'O': 50, 'P': 60, 'Q': 70, 'R': 80, 'S': 90, | |
| 'T': 100, 'U': 200, 'V': 700, 'W': 900, 'X': 300, 'Y': 400, 'Z': 500, | |
| # Basisbuchstaben und einige bereits genannte Varianten | |
| 'ا': 1, 'أ': 1, 'إ': 1, 'آ': 1, 'ب': 2, 'ج': 3, 'د': 4, 'ه': 5, 'و': 6, 'ز': 7, 'ح': 8, 'ط': 9, | |
| 'ي': 10, 'ى': 10, 'ك': 20, 'ک': 20, 'ل': 30, 'م': 40, 'ن': 50, 'س': 60, 'ع': 70, 'ف': 80, | |
| 'ص': 90, 'ق': 100, 'ر': 200, 'ش': 300, 'ت': 400, 'ث': 500, 'خ': 600, 'ذ': 700, 'ض': 800, 'ظ': 900, 'غ': 1000, | |
| 'ٱ': 1, # Alif Wasla | |
| 'ـ': 0, # Tatweel | |
| # Zusätzliche Varianten und Sonderzeichen | |
| 'ة': 400, # Taa Marbuta | |
| 'ؤ': 6, # Waw mit Hamza darüber | |
| 'ئ': 10, # Ya mit Hamza darüber | |
| 'ء': 1, # Hamza | |
| 'ى': 10, # Alif Maqsurah | |
| 'ٹ': 400, # Taa' marbuta goal | |
| 'پ': 2, # Pe (Persisch/Urdu) | |
| 'چ': 3, # Che (Persisch/Urdu) | |
| 'ژ': 7, # Zhe (Persisch/Urdu) | |
| 'گ': 20, # Gaf (Persisch/Urdu) | |
| 'ڭ': 20, # Ngaf (Kazakh, Uyghur, Uzbek, and in some Arabic dialects) | |
| 'ں': 50, # Noon Ghunna (Persisch/Urdu) | |
| 'ۀ': 5, # Heh with Yeh above (Persisch/Urdu) | |
| 'ے': 10, # Barree Yeh (Persisch/Urdu) | |
| '؋': 0, # Afghani Sign (wird als Währungssymbol verwendet, nicht für Gematria relevant, aber hier zur Vollständigkeit aufgeführt) | |
| # Anmerkung: Das Währungssymbol und ähnliche Zeichen sind in einem Gematria-Kontext normalerweise nicht relevant, | |
| # werden aber der Vollständigkeit halber aufgeführt. Es gibt noch viele weitere spezifische Zeichen in erweiterten | |
| # arabischen Schriftsystemen (z.B. für andere Sprachen wie Persisch, Urdu, Pashto usw.), die hier nicht vollständig | |
| # abgedeckt sind. | |
| # Grund- und Schlussformen hebräischer Buchstaben | |
| 'א': 1, 'ב': 2, 'ג': 3, 'ד': 4, 'ה': 5, 'ו': 6, 'ז': 7, 'ח': 8, 'ט': 9, 'י': 10, | |
| 'כ': 20, 'ך': 500, 'ל': 30, 'מ': 40, 'ם': 600, 'נ': 50, 'ן': 700, 'ס': 60, 'ע': 70, 'פ': 80, 'ף': 800, | |
| 'צ': 90, 'ץ': 900, 'ק': 100, 'ר': 200, 'ש': 300, 'ת': 400, | |
| # Griechische Buchstaben | |
| 'α': 1, 'β': 2, 'γ': 3, 'δ': 4, 'ε': 5, 'ϝ': 6, 'ζ': 7, 'η': 8, 'θ': 9, 'ι': 10, | |
| 'κ': 20, 'λ': 30, 'μ': 40, 'ν': 50, 'ξ': 60, 'ο': 70, 'π': 80, 'ϟ': 90, 'ρ': 100, | |
| 'σ': 200, 'τ': 300, 'υ': 400, 'φ': 500, 'χ': 600, 'ψ': 700, 'ω': 800, 'ϡ': 900, | |
| # Griechische Großbuchstaben | |
| 'Α': 1, 'Β': 2, 'Γ': 3, 'Δ': 4, 'Ε': 5, 'Ϝ': 6, 'Ζ': 7, 'Η': 8, 'Θ': 9, 'Ι': 10, | |
| 'Κ': 20, 'Λ': 30, 'Μ': 40, 'Ν': 50, 'Ξ': 60, 'Ο': 70, 'Π': 80, 'Ϟ': 90, 'Ρ': 100, | |
| 'Σ': 200, 'Τ': 300, 'Υ': 400, 'Φ': 500, 'Χ': 600, 'Ψ': 700, 'Ω': 800, 'Ϡ': 900, | |
| 'σ': 200, # Sigma | |
| 'ς': 200, # Final Sigma | |
| } | |
| # Stelle sicher, dass Diakritika entfernt werden, bevor auf das Wörterbuch zugegriffen wird | |
| letter_no_diacritics = strip_diacritics(letter) | |
| if letter_no_diacritics in values: | |
| return values[letter_no_diacritics.lower()] | |
| elif letter.strip() == "": # Ignoriere Leerzeichen und leere Zeilen | |
| return 0 | |
| else: | |
| # Gib eine spezifische Warnung aus, wenn das Zeichen unbekannt ist | |
| print(f"Warnung: Unbekanntes Zeichen '{letter}' ignoriert.") | |
| return 0 | |
| def calculate_gematria(text): | |
| """Calculate the Gematria value of a given Hebrew text, ignoring spaces and non-Hebrew characters.""" | |
| return sum(letter_to_value(letter) for letter in text if letter.strip() != "") | |
| [File Ends] gematria.py | |
| [File Begins] requirements.txt | |
| fastapi==0.112.3 | |
| gradio==4.31.0 | |
| deep_translator==1.11.4 | |
| [File Ends] requirements.txt | |
| [File Begins] util.py | |
| import json | |
| import re | |
| def process_json_files(start, end): | |
| """ | |
| Processes JSON files containing Tanach text and returns a dictionary | |
| mapping book IDs to their data. | |
| Args: | |
| start: The starting book ID (inclusive). | |
| end: The ending book ID (inclusive). | |
| Returns: | |
| A dictionary where keys are book IDs and values are dictionaries | |
| containing 'title' and 'text' fields. | |
| """ | |
| base_path = "texts" | |
| results = {} # Use a dictionary to store results | |
| for i in range(start, end + 1): | |
| file_name = f"{base_path}/{i:02}.json" | |
| try: | |
| with open(file_name, 'r', encoding='utf-8') as file: | |
| data = json.load(file) | |
| if data: | |
| # Store book ID as key and book data as value | |
| results[i] = {"title": data.get("title", "No title"), "text": data.get("text", [])} | |
| except FileNotFoundError: | |
| logging.warning(f"File {file_name} not found.") | |
| except json.JSONDecodeError as e: | |
| logging.warning(f"File {file_name} could not be read as JSON: {e}") | |
| except KeyError as e: | |
| logging.warning(f"Expected key 'text' is missing in {file_name}: {e}") | |
| return results | |
| [File Ends] util.py | |
| <-- File Content Ends | |