APPS/glidr-content.git - git.mnsoft.org

#!/usr/bin/env python3
"""
Fix explanation formatting in SPL exam question files.

Converts parenthetical option references like "(A)" in prose sentences
into bullet points with bolded option references like "**(A)**".

Pattern:
  "Some intro. La construction métallique (A) utilise des feuilles. La construction (B) utilise..."
Becomes:
  "Some intro.
  - La construction métallique **(A)** utilise des feuilles.
  - La construction **(B)** utilise..."
"""

import re
import os
import glob

BASE_DIR = "/Users/i052341/Daten/Cloud/04 - Ablage/Ablage 2020 - 2029/Ablage 2025/Hobbies 2025/Segelflug/Theorie/Glidr"

# Pattern to detect option references (A), (B), (C), (D)
OPTION_REF_PATTERN = re.compile(r'\([ABCD]\)')


def bold_option_refs(text):
    """Replace (A) with **(A)** in text."""
    return re.sub(r'\(([ABCD])\)', r'**(\1)**', text)


def sentence_contains_option(sentence):
    """Check if a sentence contains a parenthetical option reference."""
    return bool(OPTION_REF_PATTERN.search(sentence))


def split_into_sentences(text):
    """
    Split text into sentences at '. ' boundaries where next sentence
    starts with an uppercase letter (including accented chars).
    """
    parts = re.split(r'(?<=\w)\.\s+(?=[A-ZÀÂÄÈÉÊËÎÏÔÙÛÜÇ])', text)
    return parts


def join_sentences(sentences):
    """Join sentences back into a paragraph, adding periods where needed."""
    parts = []
    for s in sentences:
        s = s.strip()
        if not s:
            continue
        if not s.endswith('.'):
            s = s + '.'
        parts.append(s)
    return ' '.join(parts)


def process_explanation_text(text):
    """
    Process a block of explanation text (one paragraph / multiple sentences).

    If the text contains option references in multiple sentences,
    split those into bullets.

    Returns the processed text as a string (may contain newlines for bullets).
    """
    stripped = text.strip()

    # Already a bullet - leave it alone
    if stripped.startswith('- ') or stripped.startswith('* '):
        return text

    # No option references - leave it alone
    if not OPTION_REF_PATTERN.search(text):
        return text

    # Split into sentences
    sentences = split_into_sentences(stripped)

    if len(sentences) <= 1:
        # Single sentence - just bold the option refs
        return bold_option_refs(text)

    # Count how many sentences have option refs
    option_sentence_indices = [i for i, s in enumerate(sentences) if sentence_contains_option(s)]

    if len(option_sentence_indices) <= 1:
        # Only one sentence has option refs - just bold them inline
        return bold_option_refs(text)

    # Multiple sentences have option refs - convert them to bullets
    first_opt_idx = option_sentence_indices[0]
    last_opt_idx = option_sentence_indices[-1]

    intro_sentences = sentences[:first_opt_idx]
    middle_sentences = sentences[first_opt_idx:last_opt_idx + 1]
    outro_sentences = sentences[last_opt_idx + 1:]

    output_lines = []

    # Intro as regular text
    if intro_sentences:
        output_lines.append(join_sentences(intro_sentences))

    # Middle sentences (option-containing and any in between) as bullets
    for s in middle_sentences:
        s_clean = s.strip().rstrip('.')
        bolded = bold_option_refs(s_clean)
        output_lines.append(f'- {bolded}.')

    # Outro as regular text
    if outro_sentences:
        output_lines.append(join_sentences(outro_sentences))

    return '\n'.join(output_lines)


def process_explanation_block(lines):
    """
    Process a block of lines from an explanation section.
    Groups consecutive non-special lines into paragraphs and processes each.
    """
    result = []
    i = 0

    while i < len(lines):
        line = lines[i]

        # Empty line - keep as is
        if not line.strip():
            result.append(line)
            i += 1
            continue

        # Already a bullet line - keep as is
        if line.strip().startswith('- ') or line.strip().startswith('* '):
            result.append(line)
            i += 1
            continue

        # Header line - keep as is
        if line.strip().startswith('#'):
            result.append(line)
            i += 1
            continue

        # Regular text line - collect into a paragraph
        para_lines = []
        while i < len(lines):
            current = lines[i]
            # Stop at empty lines, bullets, or headers
            if not current.strip():
                break
            if current.strip().startswith('- ') or current.strip().startswith('* '):
                break
            if current.strip().startswith('#'):
                break
            para_lines.append(current)
            i += 1

        if not para_lines:
            i += 1
            continue

        # Join the paragraph lines and process
        para_text = ' '.join(l.strip() for l in para_lines)
        processed = process_explanation_text(para_text)

        # Add processed text (may be multiple lines due to bullets)
        result.extend(processed.split('\n'))

    return result


def process_file(filepath):
    """Process a single markdown file, fixing explanation formatting."""
    with open(filepath, 'r', encoding='utf-8') as f:
        content = f.read()

    lines = content.split('\n')
    result_lines = []
    changes_made = 0
    i = 0

    while i < len(lines):
        line = lines[i]

        # Check if this is an explanation header
        if re.match(r'^#### (Explanation|Erklärung|Explication)\s*$', line.strip()):
            result_lines.append(line)
            i += 1

            # Collect lines until next #### or ### header
            explanation_lines = []
            while i < len(lines):
                current = lines[i]
                if re.match(r'^####? ', current) or re.match(r'^### ', current):
                    break
                explanation_lines.append(current)
                i += 1

            # Process the explanation block
            processed = process_explanation_block(explanation_lines)

            # Count if there was a change
            if explanation_lines != processed:
                changes_made += 1

            result_lines.extend(processed)
        else:
            result_lines.append(line)
            i += 1

    new_content = '\n'.join(result_lines)

    if new_content != content:
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(new_content)

    return changes_made


def main():
    """Process all SPL exam question files."""
    patterns = [
        os.path.join(BASE_DIR, "SPL Exam Questions EN", "*.md"),
        os.path.join(BASE_DIR, "SPL Exam Questions DE", "*.md"),
        os.path.join(BASE_DIR, "SPL Exam Questions FR", "*.md"),
    ]

    total_files = 0
    total_changes = 0

    for pattern in patterns:
        files = sorted(glob.glob(pattern))
        for filepath in files:
            filename = os.path.basename(filepath)
            # Skip combined index files
            if filename.startswith("SPL Exam Questions"):
                continue

            changes = process_file(filepath)
            total_files += 1
            total_changes += changes

            lang_folder = os.path.basename(os.path.dirname(filepath))
            status = f"  {changes} explanations converted" if changes > 0 else "  (no changes)"
            print(f"[{lang_folder}] {filename}{status}")

    print(f"\nTotal: {total_files} files processed, {total_changes} explanations converted to bullets")


if __name__ == "__main__":
    main()