#!/usr/bin/env python3 """ Fix explanation formatting in SPL exam question files. Converts parenthetical option references like "(A)" in prose sentences into bullet points with bolded option references like "**(A)**". Pattern: "Some intro. La construction métallique (A) utilise des feuilles. La construction (B) utilise..." Becomes: "Some intro. - La construction métallique **(A)** utilise des feuilles. - La construction **(B)** utilise..." """ import re import os import glob BASE_DIR = "/Users/i052341/Daten/Cloud/04 - Ablage/Ablage 2020 - 2029/Ablage 2025/Hobbies 2025/Segelflug/Theorie/Glidr" # Pattern to detect option references (A), (B), (C), (D) OPTION_REF_PATTERN = re.compile(r'\([ABCD]\)') def bold_option_refs(text): """Replace (A) with **(A)** in text.""" return re.sub(r'\(([ABCD])\)', r'**(\1)**', text) def sentence_contains_option(sentence): """Check if a sentence contains a parenthetical option reference.""" return bool(OPTION_REF_PATTERN.search(sentence)) def split_into_sentences(text): """ Split text into sentences at '. ' boundaries where next sentence starts with an uppercase letter (including accented chars). """ parts = re.split(r'(?<=\w)\.\s+(?=[A-ZÀÂÄÈÉÊËÎÏÔÙÛÜÇ])', text) return parts def join_sentences(sentences): """Join sentences back into a paragraph, adding periods where needed.""" parts = [] for s in sentences: s = s.strip() if not s: continue if not s.endswith('.'): s = s + '.' parts.append(s) return ' '.join(parts) def process_explanation_text(text): """ Process a block of explanation text (one paragraph / multiple sentences). If the text contains option references in multiple sentences, split those into bullets. Returns the processed text as a string (may contain newlines for bullets). """ stripped = text.strip() # Already a bullet - leave it alone if stripped.startswith('- ') or stripped.startswith('* '): return text # No option references - leave it alone if not OPTION_REF_PATTERN.search(text): return text # Split into sentences sentences = split_into_sentences(stripped) if len(sentences) <= 1: # Single sentence - just bold the option refs return bold_option_refs(text) # Count how many sentences have option refs option_sentence_indices = [i for i, s in enumerate(sentences) if sentence_contains_option(s)] if len(option_sentence_indices) <= 1: # Only one sentence has option refs - just bold them inline return bold_option_refs(text) # Multiple sentences have option refs - convert them to bullets first_opt_idx = option_sentence_indices[0] last_opt_idx = option_sentence_indices[-1] intro_sentences = sentences[:first_opt_idx] middle_sentences = sentences[first_opt_idx:last_opt_idx + 1] outro_sentences = sentences[last_opt_idx + 1:] output_lines = [] # Intro as regular text if intro_sentences: output_lines.append(join_sentences(intro_sentences)) # Middle sentences (option-containing and any in between) as bullets for s in middle_sentences: s_clean = s.strip().rstrip('.') bolded = bold_option_refs(s_clean) output_lines.append(f'- {bolded}.') # Outro as regular text if outro_sentences: output_lines.append(join_sentences(outro_sentences)) return '\n'.join(output_lines) def process_explanation_block(lines): """ Process a block of lines from an explanation section. Groups consecutive non-special lines into paragraphs and processes each. """ result = [] i = 0 while i < len(lines): line = lines[i] # Empty line - keep as is if not line.strip(): result.append(line) i += 1 continue # Already a bullet line - keep as is if line.strip().startswith('- ') or line.strip().startswith('* '): result.append(line) i += 1 continue # Header line - keep as is if line.strip().startswith('#'): result.append(line) i += 1 continue # Regular text line - collect into a paragraph para_lines = [] while i < len(lines): current = lines[i] # Stop at empty lines, bullets, or headers if not current.strip(): break if current.strip().startswith('- ') or current.strip().startswith('* '): break if current.strip().startswith('#'): break para_lines.append(current) i += 1 if not para_lines: i += 1 continue # Join the paragraph lines and process para_text = ' '.join(l.strip() for l in para_lines) processed = process_explanation_text(para_text) # Add processed text (may be multiple lines due to bullets) result.extend(processed.split('\n')) return result def process_file(filepath): """Process a single markdown file, fixing explanation formatting.""" with open(filepath, 'r', encoding='utf-8') as f: content = f.read() lines = content.split('\n') result_lines = [] changes_made = 0 i = 0 while i < len(lines): line = lines[i] # Check if this is an explanation header if re.match(r'^#### (Explanation|Erklärung|Explication)\s*$', line.strip()): result_lines.append(line) i += 1 # Collect lines until next #### or ### header explanation_lines = [] while i < len(lines): current = lines[i] if re.match(r'^####? ', current) or re.match(r'^### ', current): break explanation_lines.append(current) i += 1 # Process the explanation block processed = process_explanation_block(explanation_lines) # Count if there was a change if explanation_lines != processed: changes_made += 1 result_lines.extend(processed) else: result_lines.append(line) i += 1 new_content = '\n'.join(result_lines) if new_content != content: with open(filepath, 'w', encoding='utf-8') as f: f.write(new_content) return changes_made def main(): """Process all SPL exam question files.""" patterns = [ os.path.join(BASE_DIR, "SPL Exam Questions EN", "*.md"), os.path.join(BASE_DIR, "SPL Exam Questions DE", "*.md"), os.path.join(BASE_DIR, "SPL Exam Questions FR", "*.md"), ] total_files = 0 total_changes = 0 for pattern in patterns: files = sorted(glob.glob(pattern)) for filepath in files: filename = os.path.basename(filepath) # Skip combined index files if filename.startswith("SPL Exam Questions"): continue changes = process_file(filepath) total_files += 1 total_changes += changes lang_folder = os.path.basename(os.path.dirname(filepath)) status = f" {changes} explanations converted" if changes > 0 else " (no changes)" print(f"[{lang_folder}] {filename}{status}") print(f"\nTotal: {total_files} files processed, {total_changes} explanations converted to bullets") if __name__ == "__main__": main()