import streamlit as st
import os
import re
import zipfile
import io
from openai import OpenAI
import doc_loader  # FIX 1: Import the new loader module

# --- CONFIGURATION ---
st.set_page_config(page_title="Context Flattener", page_icon="📄", layout="centered")

# --- CORE LOGIC: THE OUTLINE PARSER ---
class OutlineProcessor:
    """
    Parses a raw text file.
    Includes a pre-processing step to stitch multi-line items together.
    """
    def __init__(self, file_content):
        self.raw_lines = file_content.split('\n')

    def _is_list_item(self, line):
        pattern = r"^\s*(\d+\.|[a-zA-Z]\.|-|\*)\s+"
        return bool(re.match(pattern, line))

    def _merge_multiline_items(self):
        merged_lines = []
        
        for line in self.raw_lines:
            stripped = line.strip()
            if not stripped:
                continue
            
            if not merged_lines:
                merged_lines.append(line)
                continue

            is_new_item = self._is_list_item(line)
            
            if not is_new_item:
                merged_lines[-1] = merged_lines[-1].rstrip() + " " + stripped
            else:
                merged_lines.append(line)
                
        return merged_lines

    def parse(self):
        clean_lines = self._merge_multiline_items()
        
        stack = [] 
        results = []

        for line in clean_lines:
            stripped = line.strip()
            indent = len(line) - len(line.lstrip())
            
            while stack and stack[-1]['indent'] >= indent:
                stack.pop()
            
            stack.append({'indent': indent, 'text': stripped})

            if len(stack) > 1:
                context_list = [item['text'] for item in stack[:-1]]
                context_str = " > ".join(context_list)
            else:
                context_str = "ROOT"

            results.append({
                "context": context_str,
                "target": stripped,
                "full_path": " > ".join([item['text'] for item in stack])
            })
            
        return results

# --- CORE LOGIC: THE LLM WRITER ---
def flatten_sentence(context, target, api_key, model="gpt-4o"):
    if not api_key:
        return "[ERROR: No API Key]"
        
    client = OpenAI(api_key=api_key)
    
    prompt = (
        "You are a Technical Data Specialist. "
        "Your task is to rewrite the 'Target' text into a single, self-contained sentence "
        "that explicitly incorporates information from the 'Context'.\n"
        "Rules:\n"
        "1. Do not use pronouns like 'it', 'this', or 'they'—use specific nouns from the context.\n"
        "2. The output must stand alone without needing the previous sentences.\n"
        "3. Do not add new facts, just merge the context.\n\n"
        f"### CONTEXT: {context}\n"
        f"### TARGET: {target}\n\n"
        "### FLATTENED STATEMENT:"
    )

    try:
        response = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            temperature=0.0, 
            max_tokens=300
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"[Error: {e}]"

# --- STREAMLIT UI ---
st.title("📄 Batch Context Flattener")
st.caption("Convert nested outlines (PDF, Word, PPT, Text) into RAG-optimized flat statements.")

# 1. Credentials & Settings
with st.sidebar:
    st.header("Settings")
    api_key = os.getenv("OPENAI_API_KEY")
    model_choice = st.selectbox("Model", ["gpt-4o", "gpt-4-turbo", "gpt-3.5-turbo"])
    
    st.divider()
    st.subheader("Ingestion Mode")
    use_vision = st.toggle("Enable Vision Mode", value=False, help="Uses GPT-4o to 'read' slides as images. Slower/Costlier, but captures tables and diagrams.")

# 2. Upload
uploaded_files = st.file_uploader(
    "Upload Documents", 
    type=["txt", "md", "pdf", "docx", "pptx", "xlsx", "csv"],
    accept_multiple_files=True
)

if uploaded_files:
    total_files = len(uploaded_files)
    st.write(f"**Queued {total_files} files for processing.**")
    
    # 3. Preview Logic (Updates based on Vision Toggle)
    first_file = uploaded_files[0]
    
    try:
        if use_vision and not api_key:
            st.warning("⚠️ Vision Mode selected but no API Key provided. Preview will fail.")
        else:
            with st.spinner(f"Extracting Preview ({'Vision Mode' if use_vision else 'Fast Mode'})..."):
                # FIX: Pass the vision flag and api key here
                preview_text = doc_loader.extract_text_from_file(
                    first_file, 
                    use_vision=use_vision, 
                    api_key=api_key
                )
                
            # Reset pointer
            first_file.seek(0) 
            
            st.info(f"Preview (from {first_file.name}): Extracted {len(preview_text)} characters.")
            
            processor = OutlineProcessor(preview_text)
            preview_items = processor.parse()
            
            with st.expander(f"👁️ Preview Logic"):
                # Safety check for empty parse results
                if not preview_items:
                    st.warning("No structured items found. Vision mode output might need different formatting.")
                    st.text(preview_text[:500]) # Show raw text for debugging
                else:
                    for i, item in enumerate(preview_items[:3]):
                        st.code(f"{item['context']} -> {item['target']}")

    except Exception as e:
        st.error(f"Preview failed: {e}")

    # 4. Process Batch Action
    if st.button(f"🚀 Flatten All {total_files} Files", type="primary"):
        if not api_key:
            st.error("Please enter an API Key in the sidebar.")
            st.stop()
            
        progress_bar = st.progress(0)
        status_text = st.empty()
        zip_buffer = io.BytesIO()
        
        with zipfile.ZipFile(zip_buffer, "w") as zf:
            
            for file_idx, uploaded_file in enumerate(uploaded_files):
                file_name = uploaded_file.name
                status_text.text(f"Processing {file_idx + 1}/{total_files}: {file_name}...")
                
                try:
                    # FIX: Pass vision/api_key here too
                    string_data = doc_loader.extract_text_from_file(
                        uploaded_file, 
                        use_vision=use_vision, 
                        api_key=api_key
                    )
                    
                    # Parse
                    processor = OutlineProcessor(string_data)
                    parsed_items = processor.parse()
                    
                    # Flatten
                    file_output = []
                    if not parsed_items:
                         # Fallback if parsing fails: just flatten the whole chunk
                         file_output.append("[WARNING: No outline structure detected. Content below:]")
                         file_output.append(string_data)
                    else:
                        for item_idx, item in enumerate(parsed_items):
                            flat_text = flatten_sentence(item['context'], item['target'], api_key, model_choice)
                            file_output.append(flat_text)
                            
                            # Granular progress bar
                            total_progress = (file_idx + (item_idx / len(parsed_items))) / total_files
                            progress_bar.progress(min(total_progress, 1.0))
                    
                    final_text = "\n".join(file_output)
                    base_name = os.path.splitext(file_name)[0]
                    zf.writestr(f"{base_name}_flattened.txt", final_text)
                    
                except Exception as e:
                    st.error(f"Error processing {file_name}: {e}")
        
        progress_bar.progress(1.0)
        status_text.text("✅ Batch Processing Complete!")
        
        st.divider()
        st.download_button(
            label="Download All Files (ZIP)",
            data=zip_buffer.getvalue(),
            file_name="flattened_batch.zip",
            mime="application/zip"
        )