{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "psp-videoarchive/summary/v2",
  "title": "PSP Event Summary",
  "description": "AI-generated structured summary of a Czech Parliament (PSP) event transcript. Field naming follows the Popolo / Legislature Data Standard where applicable.",
  "type": "object",
  "required": ["schema_version", "event", "transcription", "summary", "entities", "highlights", "controversial", "quality"],
  "additionalProperties": false,
  "properties": {

    "schema_version": {
      "type": "string",
      "const": "2",
      "description": "Version of this schema. Increment when fields are added or types change."
    },
    "created_at": {
      "type": ["string", "null"],
      "format": "date-time",
      "description": "ISO-8601 timestamp of when the summary was generated. Set by the script, not the LLM. (Popolo: created_at)"
    },
    "model_hint": {
      "type": ["string", "null"],
      "description": "Model identifier used to generate the summary. Set by the script, not the LLM."
    },

    "event": {
      "type": "object",
      "description": "Metadata about the parliamentary event, copied from the transcript header.",
      "required": ["id", "name", "classification", "start_date"],
      "additionalProperties": false,
      "properties": {
        "id":             { "type": "string", "description": "PSP event ID. (Popolo: id)" },
        "name":           { "type": "string", "minLength": 1, "description": "Full event title. (Popolo: name)" },
        "classification": { "type": "string", "minLength": 1, "description": "Event category, e.g. Kulatý stůl, Seminář. (Popolo: classification)" },
        "start_date":     { "type": "string", "description": "ISO 8601: YYYY-MM-DD or YYYY-MM-DDTHH:MM. (Popolo: start_date)" },
        "end_date":       { "type": ["string", "null"], "description": "ISO 8601: YYYY-MM-DD or YYYY-MM-DDTHH:MM; null if unknown. (Popolo: end_date)" },
        "sources":        {
          "type": "array",
          "items": { "type": "string" },
          "description": "URLs or local paths to source transcripts. (Popolo: sources)"
        },
        "video_parts": {
          "type": "array",
          "description": "Ordered list of source video files used for deep-linking. Injected by the script from metadata, not the LLM.",
          "items": {
            "type": "object",
            "required": ["part", "url"],
            "additionalProperties": false,
            "properties": {
              "part":     { "type": "integer", "minimum": 1, "description": "1-based part index matching transcript part numbers." },
              "url":      { "type": "string", "description": "Full URL of the MP4 file on videoarchiv.psp.cz." },
              "from_sec": { "type": "integer", "minimum": 0, "description": "Seconds into the file where content begins (pre-roll offset). Omitted if zero." }
            }
          }
        }
      }
    },

    "transcription": {
      "type": "object",
      "description": "Technical metadata about the source transcript.",
      "required": ["parts_transcribed", "parts_total", "source"],
      "additionalProperties": false,
      "properties": {
        "parts_transcribed": { "type": "integer", "minimum": 0 },
        "parts_total":       { "type": "integer", "minimum": 1 },
        "source": {
          "type": "string",
          "enum": ["whisper", "captions", "groq", "mixed"],
          "description": "Backend that produced the transcript."
        },
        "model": { "type": ["string", "null"], "description": "Transcription model name." }
      }
    },

    "summary": {
      "type": "object",
      "description": "Human-readable summary in Czech, stored as Markdown strings.",
      "required": ["topic", "main_points", "outcome", "notes"],
      "additionalProperties": false,
      "properties": {
        "topic": {
          "type": "string",
          "minLength": 1,
          "description": "Markdown — full text of the 'Téma a účel jednání' section."
        },
        "main_points": {
          "type": "array",
          "minItems": 1,
          "items": { "type": "string", "minLength": 1 },
          "description": "Markdown — one item per speaker or thematic bullet from 'Hlavní body'."
        },
        "outcome": {
          "type": ["string", "null"],
          "description": "Markdown — full text of 'Výsledek / závěr'; null if indeterminate."
        },
        "notes": {
          "type": ["string", "null"],
          "description": "Markdown — transcript quality notes; null if none."
        }
      }
    },

    "entities": {
      "type": "object",
      "description": "Named entities extracted from the transcript.",
      "required": ["speakers", "parties", "institutions"],
      "additionalProperties": false,
      "properties": {
        "speakers": {
          "type": "array",
          "description": "Speakers identified in the transcript. (Popolo: person)",
          "items": {
            "type": "object",
            "required": ["name"],
            "additionalProperties": false,
            "properties": {
              "name":        { "type": "string", "description": "Display name as it appears in the transcript. (Popolo: name)" },
              "person_id":   { "type": ["string", "null"], "description": "Popolo person.id if known; null otherwise." },
              "affiliation": { "type": ["string", "null"], "description": "Party or institution name as free text." }
            }
          }
        },
        "parties": {
          "type": "array",
          "items": { "type": "string" },
          "description": "Political parties mentioned. (Popolo: organization with classification=party)"
        },
        "institutions": {
          "type": "array",
          "items": { "type": "string" },
          "description": "Institutions and organisations mentioned. (Popolo: organization)"
        }
      }
    },

    "highlights": {
      "type": "array",
      "description": "Short key statements (1 sentence) suitable for social media. Prefer hard facts and specific numbers.",
      "items": {
        "type": "object",
        "required": ["text", "type", "speaker", "affiliation", "timestamp"],
        "additionalProperties": false,
        "properties": {
          "text":            { "type": "string", "minLength": 1, "description": "The statement — direct quote or paraphrase, ~1 sentence." },
          "type":            { "type": "string", "enum": ["citation", "paraphrase"], "description": "Whether text is a direct quote or a paraphrase." },
          "speaker":         { "type": ["string", "null"], "description": "Speaker name, or null if unattributed." },
          "affiliation":     { "type": ["string", "null"], "description": "Party or institution, or null if unknown." },
          "timestamp":       { "type": ["string", "null"], "description": "Format: 'N/MM:SS' where N is the transcript part number and MM:SS is the nearest [MM:SS] marker in the text. Null if not determinable." },
          "screenshot_path": { "type": ["string", "null"], "description": "Path to extracted video frame. Set by the script, not the LLM." },
          "context":         { "type": ["string", "null"], "description": "Brief factual context or fact-check note added by a second LLM pass. Null if not yet generated." }
        }
      }
    },

    "controversial": {
      "type": "array",
      "description": "Controversial statements or proposals, each with attribution and reasoning.",
      "items": {
        "type": "object",
        "required": ["statement", "speaker", "affiliation", "timestamp"],
        "additionalProperties": false,
        "properties": {
          "statement":       { "type": "string", "minLength": 1, "description": "Markdown — what was said and why it is controversial." },
          "speaker":         { "type": ["string", "null"], "description": "Speaker name, or null if unattributed." },
          "affiliation":     { "type": ["string", "null"], "description": "Party or institution, or null if unknown." },
          "timestamp":       { "type": ["string", "null"], "description": "Format: 'N/MM:SS' — part number and nearest [MM:SS] marker. Null if not determinable." },
          "screenshot_path": { "type": ["string", "null"], "description": "Path to extracted video frame. Set by the script, not the LLM." },
          "context":         { "type": ["string", "null"], "description": "Brief factual context or fact-check note added by a second LLM pass. Null if not yet generated." }
        }
      }
    },

    "quality": {
      "type": "object",
      "description": "Assessment of transcript quality affecting summary reliability.",
      "required": ["transcript_quality", "unintelligible_parts"],
      "additionalProperties": false,
      "properties": {
        "transcript_quality": {
          "type": "string",
          "enum": ["good", "partial", "poor"],
          "description": "good = reliable; partial = some gaps; poor = significant corruption."
        },
        "unintelligible_parts": {
          "type": "boolean",
          "description": "True if sections were too corrupted to summarise."
        }
      }
    },

    "extras": {
      "type": ["object", "null"],
      "description": "Additional metadata not covered by the core schema. (Popolo: extras)"
    }

  }
}
