import gradio as gr
from openai import OpenAI

# Initialize the OpenAI client
client = OpenAI(
    base_url='https://hub.societyai.com/models/llama-3-2-3b/openai/v1',
)

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(type="messages")
    msg = gr.Textbox()
    clear = gr.Button("Clear")

    def chat(user_message, history: list):
        """
          1) Appends the user message to the conversation history
          2) Sends the conversation history to the vLLM API
          3) Streams back the assistant's response
        """

        # 1) Append the user message
        history = history + [{"role": "user", "content": user_message}]

        # 2) Prepare an empty assistant entry for streaming
        history.append({"role": "assistant", "content": ""})

        # 3) Stream the assistant's response
        try:
            completion = client.chat.completions.create(
                model="llama-3.2-3B-instruct",
                messages=history,
                stream=True
            )
            for chunk in completion:
                delta = chunk.choices[0].delta
                content = getattr(delta, 'content', '')
                if content:
                    history[-1]['content'] += content
                    # Yield both the cleared textbox ("") and updated history
                    yield "", history
        except Exception as e:
            # Handle exceptions and display an error message
            history[-1]['content'] += f"\n[Error]: {str(e)}"
            yield "", history

    # Wire up the single chat function to the UI
    msg.submit(
        fn=chat,               # single consolidated function
        inputs=[msg, chatbot], # pass user message and current chatbot history
        outputs=[msg, chatbot] # clear the message box and update the chatbot
    )

    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    demo.launch()