#!/usr/bin/env python3 """ Restructure explanation text in SPL exam question files to use markdown bullets for option analysis sentences. """ import re import os from pathlib import Path BASE_DIR = Path("/Users/i052341/Daten/Cloud/04 - Ablage/Ablage 2020 - 2029/Ablage 2025/Hobbies 2025/Segelflug/Theorie/Glidr") SUBJECT_DIRS = { "EN": BASE_DIR / "SPL Exam Questions EN", "DE": BASE_DIR / "SPL Exam Questions DE", "FR": BASE_DIR / "SPL Exam Questions FR", } EXPLANATION_HEADERS = { "EN": "#### Explanation", "DE": "#### Erklärung", "FR": "#### Explication", } KEY_TERMS_HEADERS = { "EN": "#### Key Terms", "DE": "#### Begriffe", "FR": "#### Termes clés", } # Patterns that identify option-analysis sentences. # Each pattern must match the START of a sentence (after splitting on sentence boundaries). # Strategy: a sentence is an option sentence if it STARTS with an option reference. # This catches all forms: "Option A is...", "Option A (label) contains...", "Options A and B are...", etc. OPTION_PATTERNS = [ # EN/DE: sentence starts with "Option" or "Options" followed by one or more letters A-D r"^Options?\s+[A-D]", # EN/DE: "Die Option A" or "Nur Option A" r"^(?:Die|Nur|Only)\s+Options?\s+[A-D]", # EN bare letter: "A is wrong", "B is incorrect" (only when followed by a form of "to be") r"^[A-D]\s+(?:is|are|was|were|would|can|cannot|does|did|has|have)\b", # DE bare letter: "A ist falsch" r"^[A-D]\s+(?:ist|sind|war|w\u00e4re)\b", # FR: "L'option A" or "L\u2019option A" r"^L['\u2019]?options?\s+[A-D]", # FR bare letter: "A est incorrecte" r"^[A-D]\s+est\b", # Generic: "Option A:" or "Option A —" or "Option A–" r"^Options?\s+[A-D](?:\s*(?:,|and|und|et)\s*(?:and\s+|und\s+|et\s+)?[A-D])*\s*(?::|—|–)", # "Seule l'option C" (FR), "Nur Option C" (DE), "Only Option C" (EN) - correct answer callouts r"^(?:Seule\s+l['\u2019]?option|Nur\s+Option|Only\s+Option)\s+[A-D]", ] # Compiled combined pattern (case-sensitive) # We split by detecting sentence starts matching any option pattern. # Strategy: split the explanation text into sentences, then bucket them. def split_into_sentences(text): """ Split text into sentences. Split on sentence-ending punctuation followed by a space and a capital letter. Handles: - ". Capital" - '." Capital' (period inside quotes) - ".'" / ".»" etc. """ # Split on: period (optionally followed by closing quote/bracket) then whitespace then capital parts = re.split(r'(?<=\.)["\'\u201d\u2019»]?\s+(?=[A-Z\xdc\xc4\xd6L\'"«\u201c\u2018])', text.strip()) return parts def is_option_sentence(sentence, lang): """Return True if the sentence is an option-analysis sentence.""" s = sentence.strip() for pat in OPTION_PATTERNS: if re.match(pat, s, re.IGNORECASE): return True return False def bold_option_reference(sentence): """ Bold the initial option reference in a sentence. e.g. "Option A is wrong..." -> "**Option A** is wrong..." "Option A (label) contains..." -> "**Option A** (label) contains..." "Options A and B are..." -> "**Options A and B** are..." "L'option A est..." -> "**L'option A** est..." "Only Option C correctly..." -> "Only **Option C** correctly..." "A is wrong" -> "**A** is wrong" """ s = sentence.strip() patterns_to_bold = [ # EN/DE multi: "Options A, B, and C" / "Options A und B" (must come before single) (r'^(Options?\s+[A-D](?:\s*(?:,|and|und)\s*(?:and\s+|und\s+)?[A-D])+)', r'**\1**'), # EN/DE single: "Option A" (r'^(Options?\s+[A-D])\b', r'**\1**'), # FR multi: "L'option A et B" (r"^(L['\u2019]?options?\s+[A-D](?:\s*(?:,|et)\s*(?:et\s+)?[A-D])+)", r'**\1**'), # FR single: "L'option A" (r"^(L['\u2019]?options?\s+[A-D])\b", r'**\1**'), # EN "Only Option C" -> "Only **Option C**" (r'^(Only\s+)(Options?\s+[A-D])\b', r'\1**\2**'), # DE "Nur Option C" (r'^(Nur\s+)(Options?\s+[A-D])\b', r'\1**\2**'), # FR "Seule l'option C" (r"^(Seule\s+)(l['\u2019]?options?\s+[A-D])\b", r'\1**\2**'), # DE "Die Option A" (r'^(Die\s+)(Options?\s+[A-D])\b', r'\1**\2**'), # Bare letter: "A is", "B est" (r'^([A-D])(\s+(?:is|are|ist|sind|est|sont|was|w\u00e4re|serait)\b)', r'**\1**\2'), ] for pat, repl in patterns_to_bold: new_s = re.sub(pat, repl, s, count=1, flags=re.IGNORECASE) if new_s != s: return new_s return s def restructure_explanation(explanation_text, lang): """ Given the raw explanation text (without the header line), restructure it: - Sentences before the first option-analysis sentence -> main paragraph - Option-analysis sentences -> bullet points Returns (new_text, was_changed: bool) """ text = explanation_text.strip() if not text: return text, False sentences = split_into_sentences(text) if not sentences: return text, False # Find index of first option sentence first_option_idx = None for i, s in enumerate(sentences): if is_option_sentence(s, lang): first_option_idx = i break if first_option_idx is None: # No option sentences found, leave as-is return text, False # Main paragraph: sentences before first option sentence main_sentences = sentences[:first_option_idx] option_sentences = sentences[first_option_idx:] # Verify that option_sentences are indeed all option-like (some trailing sentences might not be) # We keep going: once we see an option sentence, everything else goes to bullets # (this matches the described format where option analysis is at the end) main_paragraph = " ".join(s.strip() for s in main_sentences if s.strip()) bullets = [] for s in option_sentences: s = s.strip() if not s: continue # Remove trailing period for bullet, then re-add s_clean = s.rstrip(".") bolded = bold_option_reference(s_clean) bullets.append(f"- {bolded}.") if not bullets: return text, False # Build new text parts = [] if main_paragraph: parts.append(main_paragraph) parts.append("") # blank line parts.extend(bullets) new_text = "\n".join(parts) # Only report change if something actually changed changed = new_text.strip() != text.strip() return new_text, changed def process_file(filepath, lang, explanation_header, key_terms_header): """ Process a single markdown file. Returns (content, count_restructured). """ content = filepath.read_text(encoding="utf-8") lines = content.split("\n") count = 0 result_lines = [] i = 0 while i < len(lines): line = lines[i] # Check if this line is the explanation header if line.strip() == explanation_header: result_lines.append(line) i += 1 # Collect blank lines after header while i < len(lines) and lines[i].strip() == "": result_lines.append(lines[i]) i += 1 # Collect the explanation body until we hit: # - key terms header # - next question (### Q) # - end of file explanation_body_lines = [] while i < len(lines): l = lines[i] if (l.strip() == key_terms_header or l.strip().startswith("### Q") or l.strip().startswith("#### ")): break explanation_body_lines.append(l) i += 1 # The explanation body (trim trailing blanks) raw_body = "\n".join(explanation_body_lines).rstrip() new_body, changed = restructure_explanation(raw_body, lang) if changed: count += 1 result_lines.append(new_body) result_lines.append("") # trailing blank line else: # Restore original lines for bl in explanation_body_lines: result_lines.append(bl) else: result_lines.append(line) i += 1 new_content = "\n".join(result_lines) return new_content, count def main(): total_restructured = 0 total_files = 0 for lang, dir_path in SUBJECT_DIRS.items(): explanation_header = EXPLANATION_HEADERS[lang] key_terms_header = KEY_TERMS_HEADERS[lang] md_files = sorted(dir_path.glob("*.md")) for filepath in md_files: # Skip the combined file if "SPL Exam Questions" in filepath.name: continue new_content, count = process_file( filepath, lang, explanation_header, key_terms_header ) if count > 0: filepath.write_text(new_content, encoding="utf-8") print(f" [{lang}] {filepath.name}: {count} explanation(s) restructured") total_restructured += count else: print(f" [{lang}] {filepath.name}: no changes") total_files += 1 print(f"\nDone. {total_restructured} explanations restructured across {total_files} files.") if __name__ == "__main__": main()