feat: add comparative FIRT-ORPO benchmark runner for AdamW vs FCES

This commit is contained in:
AI-anonymous
2026-05-20 04:19:57 +02:00
parent 074fbc5cb6
commit 1dea539019
2 changed files with 515 additions and 0 deletions

407
benchmark_fces_vs_adam.py Normal file
View File

@@ -0,0 +1,407 @@
import argparse
import os
import sys
import time
from typing import Any, Dict, List, Tuple
# Ensure python folder is in path
python_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "python")
sys.path.insert(0, python_dir)
import torch # noqa: E402
import fces_native # noqa: E402
import dspy # noqa: E402
import torch.nn.functional as F # noqa: E402
from send_telemetry import push_to_mariadb, push_to_surrealdb # noqa: E402
from transformers import AutoModelForCausalLM, AutoTokenizer # noqa: E402
# ==============================================================================
# 1. DSPY SIGNATURE & SYSTEM DESIGN
# ==============================================================================
class LegalFIRT(dspy.Signature): # type: ignore[misc]
"""Flaw Induced Reasoning Traces (FIRT) for German & EU Tenancy / M&A Law."""
case_description: str = dspy.InputField(
desc="Facts of the German/EU legal case with potential hidden flaws or contradictions."
)
relevant_statutes: str = dspy.InputField(
desc="Relevant articles from the German Civil Code (BGB) or EU Directives."
)
t0_draft: str = dspy.OutputField(
desc="Initial legal assessment or draft resolution based on first-pass reasoning."
)
t1_flaws: str = dspy.OutputField(
desc="Identified legal flaws, logical bottlenecks, or contradictions (e.g. § 536b BGB exclusions)."
)
t2_refined_opinion: str = dspy.OutputField(
desc="Final, correct, and authoritative legal opinion addressing all identified flaws."
)
# Legal Cases for Pre- and Post-Training Benchmarking
eval_cases: List[Dict[str, str]] = [
{
"description": "Mieter M besichtigt im Januar eine Wohnung des Vermieters V. Schimmelfleck im Schlafzimmer. V sagt er wird es 'irgendwann mal wegmachen'. M unterschreibt Mietvertrag vorbehaltlos und zieht am 1. Februar ein. Im März mindert M die Miete selbstständig um 20% wegen des Schimmels. V verlangt volle Miete.",
"statutes": "§ 535 Abs. 2 BGB (Mietzahlung), § 536 Abs. 1 BGB (Mietminderung bei Sachmangel), § 536b BGB (Kenntnis des Mieters bei Vertragsschluss).",
},
{
"description": "Toilettenspülung bei F fällt am Freitagabend aus, Wasser läuft ununterbrochen und droht überzulaufen. V ist auf Segeltrip und nicht erreichbar. Kein Hausmeister. F beauftragt am Samstagmorgen den Notdienst K, bezahlt 300 € und fordert Erstattung von V. V weigert sich.",
"statutes": "§ 536a Abs. 2 Nr. 2 BGB (Selbstbeseitigungsrecht bei Gefahr im Verzug), § 535 BGB.",
},
]
# ==============================================================================
# 2. ORPO LOSS WITH SOFT LABELS
# ==============================================================================
def compute_orpo_loss(
logits: torch.Tensor,
labels: torch.Tensor,
logits_lose: torch.Tensor,
labels_lose: torch.Tensor,
lambda_orpo: float = 0.1,
) -> Tuple[torch.Tensor, torch.Tensor]:
"""Computes the combined Cross-Entropy (SFT) and Odds Ratio Preference (ORPO) Loss
using soft probability log-likelihoods.
"""
# 1. SFT Loss (Cross Entropy) on the preferred target sequence
shift_logits = logits[..., :-1, :].contiguous()
shift_labels = labels[..., 1:].contiguous()
sft_loss = F.cross_entropy(
shift_logits.view(-1, shift_logits.size(-1)),
shift_labels.view(-1),
ignore_index=-100,
)
# 2. Average log probabilities for preferred (win) and dispreferred (lose) sequences
# Logits win log_softmax
log_probs_win = F.log_softmax(logits, dim=-1)
# Gather actual label log_probs
loss_mask_win = labels != -100
labels_clamped = labels.clone()
labels_clamped[~loss_mask_win] = 0
seq_log_probs_win = torch.gather(
log_probs_win, dim=-1, index=labels_clamped.unsqueeze(-1)
).squeeze(-1)
seq_log_probs_win = (seq_log_probs_win * loss_mask_win).sum(
dim=-1
) / loss_mask_win.sum(dim=-1).clamp(min=1)
# Logits lose log_softmax
log_probs_lose = F.log_softmax(logits_lose, dim=-1)
loss_mask_lose = labels_lose != -100
labels_lose_clamped = labels_lose.clone()
labels_lose_clamped[~loss_mask_lose] = 0
seq_log_probs_lose = torch.gather(
log_probs_lose, dim=-1, index=labels_lose_clamped.unsqueeze(-1)
).squeeze(-1)
seq_log_probs_lose = (seq_log_probs_lose * loss_mask_lose).sum(
dim=-1
) / loss_mask_lose.sum(dim=-1).clamp(min=1)
# 3. Odds Ratio Calculation
log_odds_win = seq_log_probs_win - torch.log1p(-torch.exp(seq_log_probs_win))
log_odds_lose = seq_log_probs_lose - torch.log1p(-torch.exp(seq_log_probs_lose))
odds_ratio = log_odds_win - log_odds_lose
orpo_loss = -F.logsigmoid(odds_ratio).mean()
# Total combined loss
total_loss = sft_loss + lambda_orpo * orpo_loss
return total_loss, sft_loss
# ==============================================================================
# 3. EVALUATION RUNNER
# ==============================================================================
def evaluate_model(
model: AutoModelForCausalLM, tokenizer: AutoTokenizer, device: str
) -> List[Dict[str, str]]:
"""Evaluates the model on legal exam cases (zero-shot) to measure T2 opinion quality."""
model.eval()
results = []
for idx, case in enumerate(eval_cases):
prompt = (
f"Case: {case['description']}\nStatutes: {case['statutes']}\n"
"Analyze and resolve using Flaw Induced Reasoning Traces (FIRT):\n"
"T0 Draft:\n"
)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model.generate(
**inputs, max_new_tokens=256, temperature=0.1, do_sample=False
)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
results.append({"case_id": idx + 1, "output": generated_text})
return results
# ==============================================================================
# 4. TRAINING PIPELINE (ADAMW VS FCES)
# ==============================================================================
def train_run(
optimizer_name: str,
model_name: str,
steps: int,
device: str,
dataset: List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]],
) -> Tuple[List[Dict[str, Any]], List[Dict[str, str]], List[Dict[str, str]]]:
"""Runs the training loop and performs pre/post evaluations."""
print(f"\n[START] Initializing training run with {optimizer_name}...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
# 1. Pre-Training Evaluation
print(f"[{optimizer_name}] Running Pre-Training Evaluation...")
pre_eval = evaluate_model(model, tokenizer, device)
# 2. Setup Optimizer
model.train()
if optimizer_name == "AdamW":
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
elif optimizer_name == "FCES":
cfg = fces_native.FCESConfig()
cfg.lr = 1e-4
cfg.population_size = 8 # Balanced for resource footprint
cfg.total_steps = steps
optimizer = fces_native.FCESOptimizer(list(model.parameters()), cfg)
else:
raise ValueError("Invalid optimizer name")
# 3. Training loop
logs = []
start_time = time.perf_counter()
tokens_processed = 0
for step in range(steps):
# Cyclically fetch batches from pre-tokenized dataset
batch = dataset[step % len(dataset)]
input_win, labels_win, input_lose, labels_lose = [t.to(device) for t in batch]
optimizer.zero_grad()
# Forward pass for win (preferred)
outputs_win = model(input_win, labels=labels_win)
logits_win = outputs_win.logits
# Forward pass for lose (dispreferred)
with torch.no_grad():
outputs_lose = model(input_lose)
logits_lose = outputs_lose.logits
# Compute combined ORPO loss
loss, sft_loss = compute_orpo_loss(
logits_win, labels_win, logits_lose, labels_lose
)
loss.backward()
optimizer.step()
if optimizer_name == "FCES":
optimizer.update_fitness(float(loss.item()))
# Tracking metrics
elapsed = time.perf_counter() - start_time
batch_tokens = int((input_win != tokenizer.pad_token_id).sum().item())
tokens_processed += batch_tokens
logs.append(
{
"step": step,
"loss": float(loss.item()),
"sft_loss": float(sft_loss.item()),
"wall_clock_time": elapsed,
"tokens_processed": tokens_processed,
}
)
if step % 5 == 0:
print(
f"[{optimizer_name}] Step {step}/{steps} | Loss: {loss.item():.4f} | "
f"Time: {elapsed:.2f}s | Tokens: {tokens_processed}"
)
# 4. Post-Training Evaluation
print(f"[{optimizer_name}] Running Post-Training Evaluation...")
post_eval = evaluate_model(model, tokenizer, device)
# Cleanup memory
del model
if device == "cuda":
torch.cuda.empty_cache()
return logs, post_eval, pre_eval
# ==============================================================================
# 5. DATA PREPARATION (MOCK RACF DATASET FOR PRE-FLIGHT TESTS)
# ==============================================================================
def build_mock_dataset(
model_name: str, size: int = 20
) -> List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]]:
"""Synthesizes mock pre-tokenized target sequences representing
FIRT trajectories (Top-k vs Bottom-l preferences) for local dry-runs.
"""
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
dataset = []
for idx in range(size):
# Winning path (Correct FIRT opinions)
text_win = (
f"Case {idx}: Tenant wins due to Self-beseitigungsrecht § 536a BGB. "
"Unresolved flaws: None. Authority: Refined opinion correct."
)
# Losing path (Flawed/contradictory reasoning)
text_lose = (
f"Case {idx}: Tenant loses but reasoning is faulty. "
"Unresolved flaws: Ignored positive knowledge under § 536b BGB."
)
enc_win = tokenizer(
text_win,
truncation=True,
max_length=64,
padding="max_length",
return_tensors="pt",
)
enc_lose = tokenizer(
text_lose,
truncation=True,
max_length=64,
padding="max_length",
return_tensors="pt",
)
labels_win = enc_win["input_ids"].clone()
labels_win[enc_win["attention_mask"] == 0] = -100
labels_lose = enc_lose["input_ids"].clone()
labels_lose[enc_lose["attention_mask"] == 0] = -100
dataset.append(
(
enc_win["input_ids"],
labels_win,
enc_lose["input_ids"],
labels_lose,
)
)
return dataset
# ==============================================================================
# 6. MAIN CONTROLLER
# ==============================================================================
def main() -> None:
parser = argparse.ArgumentParser(description="FIRT ORPO FCES vs AdamW Trainer")
parser.add_argument(
"--model",
type=str,
default="EleutherAI/pythia-70m",
help="Model name (e.g. google/gemma-2-2b-it)",
)
parser.add_argument(
"--steps", type=int, default=10, help="Number of training steps"
)
args = parser.parse_args()
device = "cuda" if torch.cuda.is_available() else "cpu"
print("=" * 80)
print(
" FCES VS ADAMW COMPARATIVE TRAINING BENCHMARK "
)
print("=" * 80)
print(f"Device: {device} | Base Model: {args.model} | Steps: {args.steps}")
# Initialize pre-tokenized dataset
print("\n[INFO] Synthesizing target training tokens...")
dataset = build_mock_dataset(args.model)
# 1. Run AdamW Baseline
adam_logs, adam_post, pre_eval = train_run(
"AdamW", args.model, args.steps, device, dataset
)
# 2. Run FCES Optimizer
fces_logs, fces_post, _ = train_run("FCES", args.model, args.steps, device, dataset)
# 3. Telemetry Log Synthesis & Push
telemetry_entries = []
print("\n" + "-" * 30 + " FINAL RUN SUMMARY " + "-" * 30)
print(f"{'Step':<10}{'AdamW Loss':<20}{'FCES Loss':<20}{'Speedup / Savings':<25}")
print("-" * 75)
for idx in range(min(len(adam_logs), len(fces_logs))):
step_val = adam_logs[idx]["step"]
adam_loss = adam_logs[idx]["loss"]
fces_loss = fces_logs[idx]["loss"]
time_saved_pct = (
(adam_logs[idx]["wall_clock_time"] - fces_logs[idx]["wall_clock_time"])
/ adam_logs[idx]["wall_clock_time"]
* 100
)
if idx % (max(1, args.steps // 5)) == 0 or idx == args.steps - 1:
print(
f"{step_val:<10}{adam_loss:<20.4f}{fces_loss:<20.4f}"
f"{time_saved_pct:+.2f}% wall-clock time"
)
telemetry_entries.append(
(
"INFO",
"comparative_step",
f"Step {step_val} | AdamW Loss: {adam_loss:.4f} | FCES Loss: {fces_loss:.4f}",
)
)
print("-" * 75)
print("\n>>> PRE-TRAINING OPINION (CASE 1):")
print(pre_eval[0]["output"])
print("\n>>> POST-TRAINING ADAMW OPINION (CASE 1):")
print(adam_post[0]["output"])
print("\n>>> POST-TRAINING FCES OPINION (CASE 1):")
print(fces_post[0]["output"])
print("=" * 80)
# Push telemetries
push_to_surrealdb(telemetry_entries)
push_to_mariadb(telemetry_entries)
# Write comparative summary artifact
import json
results_summary = {
"model": args.model,
"steps": args.steps,
"adam_metrics": adam_logs,
"fces_metrics": fces_logs,
"pre_eval": pre_eval,
"adam_post_eval": adam_post,
"fces_post_eval": fces_post,
}
with open("benchmark_results.json", "w", encoding="utf-8") as f:
json.dump(results_summary, f, indent=4)
print("[INFO] Stored comprehensive benchmark metrics to benchmark_results.json")
if __name__ == "__main__":
main()

108
benchmark_results.json Normal file
View File

@@ -0,0 +1,108 @@
{
"model": "EleutherAI/pythia-70m",
"steps": 5,
"adam_metrics": [
{
"step": 0,
"loss": 7.041037559509277,
"sft_loss": 6.995092391967773,
"wall_clock_time": 0.8969566999949166,
"tokens_processed": 35
},
{
"step": 1,
"loss": 2.5148799419403076,
"sft_loss": 2.4989213943481445,
"wall_clock_time": 1.8027480999953696,
"tokens_processed": 70
},
{
"step": 2,
"loss": 0.8542575836181641,
"sft_loss": 0.8379661440849304,
"wall_clock_time": 2.7821450999981607,
"tokens_processed": 105
},
{
"step": 3,
"loss": 0.6839541792869568,
"sft_loss": 0.667436420917511,
"wall_clock_time": 3.560283499995421,
"tokens_processed": 140
},
{
"step": 4,
"loss": 0.359605997800827,
"sft_loss": 0.3395426273345947,
"wall_clock_time": 4.330241899995599,
"tokens_processed": 175
}
],
"fces_metrics": [
{
"step": 0,
"loss": 7.041037559509277,
"sft_loss": 6.995092391967773,
"wall_clock_time": 13.06952640000236,
"tokens_processed": 35
},
{
"step": 1,
"loss": 6.717805862426758,
"sft_loss": 6.71108341217041,
"wall_clock_time": 20.157852299998922,
"tokens_processed": 70
},
{
"step": 2,
"loss": 14.43155574798584,
"sft_loss": 14.42345142364502,
"wall_clock_time": 27.273785699995642,
"tokens_processed": 105
},
{
"step": 3,
"loss": 7.063178062438965,
"sft_loss": 7.0147223472595215,
"wall_clock_time": 34.50635989999864,
"tokens_processed": 140
},
{
"step": 4,
"loss": 5.816511631011963,
"sft_loss": 5.8062052726745605,
"wall_clock_time": 42.63929800000187,
"tokens_processed": 175
}
],
"pre_eval": [
{
"case_id": 1,
"output": "Case: Mieter M besichtigt im Januar eine Wohnung des Vermieters V. Schimmelfleck im Schlafzimmer. V sagt er wird es 'irgendwann mal wegmachen'. M unterschreibt Mietvertrag vorbehaltlos und zieht am 1. Februar ein. Im M\u00e4rz mindert M die Miete selbstst\u00e4ndig um 20% wegen des Schimmels. V verlangt volle Miete.\nStatutes: \u00a7 535 Abs. 2 BGB (Mietzahlung), \u00a7 536 Abs. 1 BGB (Mietminderung bei Sachmangel), \u00a7 536b BGB (Kenntnis des Mieters bei Vertragsschluss).\nAnalyze and resolve using Flaw Induced Reasoning Traces (FIRT):\nT0 Draft:\nThe following is a summary of the relevant facts and the relevant facts.\n1. The following is a summary of the relevant facts and the relevant facts.\n2. The following is a summary of the relevant facts and the relevant facts.\n3. The following is a summary of the relevant facts and the relevant facts.\n4. The following is a summary of the relevant facts and the relevant facts.\n5. The following is a summary of the relevant facts and the relevant facts.\n6. The following is a summary of the relevant facts and the relevant facts.\n7. The following is a summary of the relevant facts and the relevant facts.\n8. The following is a summary of the relevant facts and the relevant facts.\n9. The following is a summary of the relevant facts and the relevant facts.\n10. The following is a summary of the relevant facts and the relevant facts.\n11. The following is a summary of the relevant facts and the relevant facts.\n12. The following is a summary of the relevant facts and the relevant facts.\n13. The following is a summary of the relevant facts and the relevant facts.\n14. The following is a summary of the relevant facts and the relevant facts.\n15. The"
},
{
"case_id": 2,
"output": "Case: Toilettensp\u00fclung bei F f\u00e4llt am Freitagabend aus, Wasser l\u00e4uft ununterbrochen und droht \u00fcberzulaufen. V ist auf Segeltrip und nicht erreichbar. Kein Hausmeister. F beauftragt am Samstagmorgen den Notdienst K, bezahlt 300 \u20ac und fordert Erstattung von V. V weigert sich.\nStatutes: \u00a7 536a Abs. 2 Nr. 2 BGB (Selbstbeseitigungsrecht bei Gefahr im Verzug), \u00a7 535 BGB.\nAnalyze and resolve using Flaw Induced Reasoning Traces (FIRT):\nT0 Draft:\n\"The FIRT is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It is a method of analysis of the nature of the FIRT. It"
}
],
"adam_post_eval": [
{
"case_id": 1,
"output": "Case: Mieter M besichtigt im Januar eine Wohnung des Vermieters V. Schimmelfleck im Schlafzimmer. V sagt er wird es 'irgendwann mal wegmachen'. M unterschreibt Mietvertrag vorbehaltlos und zieht am 1. Februar ein. Im M\u00e4rz mindert M die Miete selbstst\u00e4ndig um 20% wegen des Schimmels. V verlangt volle Miete.\nStatutes: \u00a7 535 Abs. 2 BGB (Mietzahlung), \u00a7 536 Abs. 1 BGB (Mietminderung bei Sachmangel), \u00a7 536b BGB (Kenntnis des Mieters bei Vertragsschluss).\nAnalyze and resolve using Flaw Induced Reasoning Traces (FIRT):\nT0 Draft:\n \u00a7 536a BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB. \u00a7 536 BGB."
},
{
"case_id": 2,
"output": "Case: Toilettensp\u00fclung bei F f\u00e4llt am Freitagabend aus, Wasser l\u00e4uft ununterbrochen und droht \u00fcberzulaufen. V ist auf Segeltrip und nicht erreichbar. Kein Hausmeister. F beauftragt am Samstagmorgen den Notdienst K, bezahlt 300 \u20ac und fordert Erstattung von V. V weigert sich.\nStatutes: \u00a7 536a Abs. 2 Nr. 2 BGB (Selbstbeseitigungsrecht bei Gefahr im Verzug), \u00a7 535 BGB.\nAnalyze and resolve using Flaw Induced Reasoning Traces (FIRT):\nT0 Draft:\n \u00a7 536a BGB. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined opinion correct. Refined correct. Refined opinion correct. Refined correct. Refined opinion correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct. Refined correct."
}
],
"fces_post_eval": [
{
"case_id": 1,
"output": "Case: Mieter M besichtigt im Januar eine Wohnung des Vermieters V. Schimmelfleck im Schlafzimmer. V sagt er wird es 'irgendwann mal wegmachen'. M unterschreibt Mietvertrag vorbehaltlos und zieht am 1. Februar ein. Im M\u00e4rz mindert M die Miete selbstst\u00e4ndig um 20% wegen des Schimmels. V verlangt volle Miete.\nStatutes: \u00a7 535 Abs. 2 BGB (Mietzahlung), \u00a7 536 Abs. 1 BGB (Mietminderung bei Sachmangel), \u00a7 536b BGB (Kenntnis des Mieters bei Vertragsschluss).\nAnalyze and resolve using Flaw Induced Reasoning Traces (FIRT):\nT0 Draft:\n................................................................................................................................................................................. \u00a7. \u00a7. \u00a7........................................................ \u00a7. \u00a7. \u00a7............."
},
{
"case_id": 2,
"output": "Case: Toilettensp\u00fclung bei F f\u00e4llt am Freitagabend aus, Wasser l\u00e4uft ununterbrochen und droht \u00fcberzulaufen. V ist auf Segeltrip und nicht erreichbar. Kein Hausmeister. F beauftragt am Samstagmorgen den Notdienst K, bezahlt 300 \u20ac und fordert Erstattung von V. V weigert sich.\nStatutes: \u00a7 536a Abs. 2 Nr. 2 BGB (Selbstbeseitigungsrecht bei Gefahr im Verzug), \u00a7 535 BGB.\nAnalyze and resolve using Flaw Induced Reasoning Traces (FIRT):\nT0 Draft:\n.......................................................................................................................................... \u00a7.................... \u00a7.......... \u00a7......... \u00a7....... \u00a7........ \u00a7........ \u00a7......... \u00a7.......... \u00a7......... \u00a7......... \u00a7........"
}
]
}