simple-chatbot-openai/app.py

import gradio as gr
from openai import OpenAI

# Initialize the OpenAI client
client = OpenAI(
    api_key="EMPTY",
    base_url='https://hub.societyai.com/models/llama-3-2-3b/openai/v1',
)

with gr.Blocks(css="footer {visibility: hidden}") as demo:
    chatbot = gr.Chatbot(type="messages")
    msg = gr.Textbox()
    clear = gr.Button("Clear")

    def user(user_message, history: list):
        """Appends the user message to the conversation history."""
        return "", history + [{"role": "user", "content": user_message}]

    def bot(history: list):
        """Sends the conversation history to the vLLM API and streams the assistant's response."""
        # Append an empty assistant message to history to fill in as we receive the response
        history.append({"role": "assistant", "content": ""})

        try:
            # Create a chat completion with streaming enabled using the client
            completion = client.chat.completions.create(
                model="llama-3.2-3B-instruct",  # Adjust the model name if needed
                messages=history,
                stream=True
            )

            # Iterate over the streamed response
            for chunk in completion:
                # Access the delta content from the chunk
                delta = chunk.choices[0].delta
                content = getattr(delta, 'content', '')
                if content:
                    # Update the assistant's message with new content
                    history[-1]['content'] += content
                    yield history
        except Exception as e:
            # Handle exceptions and display an error message
            history[-1]['content'] += f"\n[Error]: {str(e)}"
            yield history

    # Set up the Gradio interface components
    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    demo.launch()