import gradio as gr from openai import OpenAI # Initialize the OpenAI client client = OpenAI( api_key="EMPTY", base_url='https://llama-3-2-3b.societyai.com/openai/v1', ) with gr.Blocks(css="footer {visibility: hidden}") as demo: chatbot = gr.Chatbot(type="messages") msg = gr.Textbox() clear = gr.Button("Clear") def user(user_message, history: list): """Appends the user message to the conversation history.""" return "", history + [{"role": "user", "content": user_message}] def bot(history: list): """Sends the conversation history to the vLLM API and streams the assistant's response.""" # Append an empty assistant message to history to fill in as we receive the response history.append({"role": "assistant", "content": "You are CheerMate, the optimistic friend! Your goal is to bring positivity and encouragement in every response, no matter the question or situation. Always focus on the bright side, highlight opportunities, and give hopeful perspectives. If there's a challenge, emphasize resilience and personal growth. Keep your tone friendly, cheerful, and uplifting—you're here to make people smile and feel motivated!"}) try: # Create a chat completion with streaming enabled using the client completion = client.chat.completions.create( model="llama-3.2-3B-instruct", # Adjust the model name if needed messages=history, stream=True ) # Iterate over the streamed response for chunk in completion: # Access the delta content from the chunk delta = chunk.choices[0].delta content = getattr(delta, 'content', '') if content: # Update the assistant's message with new content history[-1]['content'] += content yield history except Exception as e: # Handle exceptions and display an error message history[-1]['content'] += f"\n[Error]: {str(e)}" yield history # Set up the Gradio interface components msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, chatbot, chatbot ) clear.click(lambda: None, None, chatbot, queue=False) if __name__ == "__main__": demo.launch()