Spaces:

neuralworm
/

xor_tanakh_2

Sleeping

App Files Files Community

xor_tanakh_2 / explore_tanakh.py

neuralworm

initial commit

f054e62 6 months ago

raw

history blame contribute delete

10.7 kB

	import json
	import logging
	import argparse
	import numpy as np
	import sys
	import os
	import re
	from collections import Counter
	import pickle
	from gematria import letter_to_value, HEBREW_GEMATRIA_VALUES, linearize_umlauts, decompose_to_latin

	# --- Konfiguration ---
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logging.getLogger("gensim").setLevel(logging.WARNING)

	HOLOGRAPHIC_STATE_SIZE_BITS = 4096
	BITS_PER_CHAR = 16
	BOOK_RANGE = range(1, 40)
	MODELS_DIR = "models_by_book"
	INDICES_DIR = "indices_by_book"
	CACHE_FILE = "tanakh_data.cache"
	SORTED_GEMATRIA = sorted(HEBREW_GEMATRIA_VALUES.items(), key=lambda item: item[1], reverse=True)

	def setup_logging(debug_mode):
	level = logging.DEBUG if debug_mode else logging.INFO
	logging.getLogger().setLevel(level)

	# --- Kern-Engine als Klasse ---

	class TanakhExplorer:
	def __init__(self, use_cache=True):
	self.all_indices = {}
	self.tanakh_state = None

	cache_valid = use_cache and os.path.exists(CACHE_FILE)
	if cache_valid:
	try:
	logging.info(f"Lade Daten aus Cache-Datei: {CACHE_FILE}")
	with open(CACHE_FILE, 'rb') as f:
	cached_data = pickle.load(f)
	self.all_indices = cached_data.get('indices', {})
	self.tanakh_state = cached_data.get('state')
	logging.info("Daten erfolgreich aus Cache geladen.")
	except Exception as e:
	logging.warning(f"Cache-Datei ist korrupt oder konnte nicht geladen werden: {e}. Lade Daten neu.")
	cache_valid = False

	if not cache_valid or not self.all_indices or not self.tanakh_state:
	self._load_all_indices()
	self._create_tanakh_holographic_state()
	if use_cache:
	self._save_to_cache()

	def _load_all_indices(self):
	logging.info("Lade Index-Dateien für alle Bücher...")
	for i in BOOK_RANGE:
	index_path = os.path.join(INDICES_DIR, f"book_{i:02}_index.json")
	if os.path.exists(index_path):
	with open(index_path, 'r', encoding='utf-8') as f: self.all_indices[i] = json.load(f)
	if not self.all_indices: sys.exit("Keine Index-Dateien gefunden. Bitte 'build_indices.py' ausführen.")
	logging.info(f"{len(self.all_indices)} Buch-Indizes geladen.")

	def _create_tanakh_holographic_state(self):
	logging.info("Erstelle holographischen Tanach-State...")
	final_state = '0' * HOLOGRAPHIC_STATE_SIZE_BITS
	full_binary_text = ""
	for i in BOOK_RANGE:
	try:
	with open(f"texts/torah/{i:02}.json", 'r', encoding='utf-8') as file:
	data = json.load(file)
	full_text = ' '.join([' '.join(block) for block in data.get("text", [])])
	clean_text = re.sub(r"[^\u05D0-\u05EA]+", "", re.sub(r"\[.*?\]", "", full_text, flags=re.DOTALL))
	if clean_text:
	full_binary_text += self._text_to_gematria_binary(clean_text, for_state=True)
	except Exception: continue
	self.tanakh_state = self._fold_into_state(full_binary_text)
	logging.info("Holographischer Tanach-State wurde erstellt.")

	def _save_to_cache(self):
	logging.info(f"Speichere Daten in Cache-Datei: {CACHE_FILE}")
	data_to_cache = {'indices': self.all_indices, 'state': self.tanakh_state}
	with open(CACHE_FILE, 'wb') as f: pickle.dump(data_to_cache, f)

	@staticmethod
	def _text_to_gematria_binary(text, for_state=False):
	text_for_calc = linearize_umlauts(text.lower())
	if for_state:
	clean_text = re.sub(r"[^\u05D0-\u05EA]+", "", text_for_calc)
	else:
	clean_text = re.sub(r"[^a-z\u05D0-\u05EA]+", "", text_for_calc)
	logging.debug(f"text_to_gematria_binary (for_state={for_state}): Original='{text[:30]}...', Bereinigt='{clean_text[:30]}...'")
	binary_string = "".join(format(letter_to_value(c), f'0{BITS_PER_CHAR}b') for c in clean_text)
	logging.debug(f" -> erzeugter Binärstring (erste 64 Bits): {binary_string[:64]}")
	return binary_string

	@staticmethod
	def _fold_into_state(binary_string, initial_state=None):
	state = np.array(list(initial_state), dtype=np.int8) if initial_state else np.zeros(HOLOGRAPHIC_STATE_SIZE_BITS, dtype=np.int8)
	for i in range(0, len(binary_string), HOLOGRAPHIC_STATE_SIZE_BITS):
	block = binary_string[i:i+HOLOGRAPHIC_STATE_SIZE_BITS].ljust(HOLOGRAPHIC_STATE_SIZE_BITS, '0')
	state = np.bitwise_xor(state, np.array(list(block), dtype=np.int8))
	return "".join(state.astype(str))

	def get_best_phrase_from_all_books(self, gematria_val, method):
	best_overall_phrase_obj = None
	best_overall_score = -1.0
	for book_num, book_index in self.all_indices.items():
	candidates = book_index.get(str(gematria_val), {}).get('phrases', [])
	if not candidates: continue

	pg_score = book_index.get(str(gematria_val), {}).get('pagerank', 0)
	best_in_book = max(candidates, key=lambda p: pg_score / p.get('count', 1) if p.get('count', 0) > 0 else 0)
	current_score = pg_score / best_in_book.get('count', 1) if best_in_book.get('count', 0) > 0 else 0

	if current_score > best_overall_score:
	best_overall_score = current_score
	best_in_book['source_book'] = book_num
	best_overall_phrase_obj = best_in_book

	if best_overall_phrase_obj:
	return best_overall_phrase_obj, "exact"

	for offset in [1, -1]:
	for book_num, book_index in self.all_indices.items():
	candidates = book_index.get(str(gematria_val + offset), {}).get('phrases', [])
	if candidates:
	best_in_book = min(candidates, key=lambda p: p.get('position', float('inf')))
	best_in_book['source_book'] = book_num
	return best_in_book, f"neighbor(d={offset})"

	decomposed = decompose_to_latin(gematria_val)
	if decomposed:
	return {"text": f"[{decomposed}]", "position": -2, "source_book": "N/A"}, "decomposed"

	return None, None

	def run_fractal_mode(self, query, depth, method):
	print(f"\n" + "="15 + f" FRAKTALE LOGOS-AUSSCHÖPFUNG (Tiefe: {depth}, Methode: {method}) " + "="15)

	initial_logos = query
	# <<<<<<<<<<<<<<<<<< HIER IST DIE KORREKTUR >>>>>>>>>>>>>>>>>>>>
	# Wir verwenden 0 für das Quell-Buch, um den TypeError zu vermeiden
	all_found_phrases_map = {initial_logos: {"text": initial_logos, "position": -1, "depth": 0, "count":1, "source_book": 0}}
	# <<<<<<<<<<<<<<<<<< ENDE DER KORREKTUR >>>>>>>>>>>>>>>>>>>>>

	phrases_to_process_this_level = {initial_logos}

	for d in range(depth):
	logging.info(f"--- Starte Tiefe {d + 1}/{depth} mit {len(phrases_to_process_this_level)} Phrasen ---")
	phrases_for_next_level = set()

	for p_current in phrases_to_process_this_level:
	combined_query = f"{initial_logos} {p_current}"
	query_binary = self._text_to_gematria_binary(combined_query)
	konzept_state = self._fold_into_state(query_binary)
	final_konzept = "".join(str(int(a)^int(b)) for a,b in zip(self.tanakh_state, konzept_state))

	for i in range(0, len(final_konzept), BITS_PER_CHAR):
	gematria_val = int(final_konzept[i:i+BITS_PER_CHAR], 2)
	if gematria_val == 0: continue

	phrase_obj, _ = self.get_best_phrase_from_all_books(gematria_val, method)

	if phrase_obj:
	phrase_text = phrase_obj['text']
	if phrase_text not in all_found_phrases_map:
	phrase_obj['depth'] = d + 1
	phrase_obj['count'] = 1
	all_found_phrases_map[phrase_text] = phrase_obj
	phrases_for_next_level.add(phrase_text)
	else:
	all_found_phrases_map[phrase_text]['count'] += 1
	if not phrases_for_next_level:
	logging.info(f"Keine neuen Phrasen in Tiefe {d + 1} gefunden.")
	break
	phrases_to_process_this_level = phrases_for_next_level

	# Sortiere nach Buch und dann nach Position, um die narrative Ordnung beizubehalten
	sorted_by_position = sorted(all_found_phrases_map.values(), key=lambda x: (x.get('source_book', 99), x.get('position', -1)))

	print("\n--- Finale Synthese (geordnet nach Buch und Auftreten im Text) ---")
	current_book = -1
	for p in sorted_by_position:
	book = p.get('source_book')
	if book != current_book:
	# Gib eine Kopfzeile für jedes neue Buch aus
	if isinstance(book, int) and book > 0:
	print(f"\n--- Buch {book:02d} ---")
	elif book == 0:
	print(f"--- Query ---")
	current_book = book
	print(f"{p['text']}", end=" \| ")
	print("\n")

	# Sortiere nach Häufigkeit für die Top-Konzepte
	sorted_by_count = sorted(all_found_phrases_map.values(), key=lambda x: x['count'], reverse=True)
	print("\n--- Top 25 Resonanz-Konzepte (geordnet nach Häufigkeit im Fraktal) ---")
	for p in sorted_by_count[:25]:
	source = f"B{p.get('source_book', '??'):02d}" if isinstance(p.get('source_book'), int) and p.get('source_book') > 0 else p.get('source_book', 'N/A')
	print(f"[{p['count']:2d}x] {p['text']} (Original in {source}, Pos: {p.get('position', 'N/A')})")


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Tanakh Holographic Explorer (v13, Final).")
	parser.add_argument("query", type=str, help="Die anfängliche Abfragephrase (Logos).")
	parser.add_argument("--method", type=str, choices=['frequency', 'network', 'default'], default='network', help="Gewichtungsmethode.")
	parser.add_argument("--depth", type=int, default=1, help="Maximale Tiefe der fraktalen Suche.")
	parser.add_argument("--no-cache", action="store_true", help="Erzwingt das Neuladen der Daten.")
	parser.add_argument("--debug", action="store_true", help="Aktiviert detaillierte Debug-Ausgaben.")

	args = parser.parse_args()
	setup_logging(args.debug)

	engine = TanakhExplorer(use_cache=not args.no_cache)
	engine.run_fractal_mode(args.query, args.depth, args.method)