From 4c63f13b8f2065098df8d2a61396656846c07e79 Mon Sep 17 00:00:00 2001 From: Hezi Aharon Date: Mon, 30 Dec 2024 12:37:29 +0000 Subject: [PATCH] Update app.py --- app.py | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/app.py b/app.py index d0be26d..168d0d6 100644 --- a/app.py +++ b/app.py @@ -11,41 +11,46 @@ with gr.Blocks() as demo: msg = gr.Textbox() clear = gr.Button("Clear") - def user(user_message, history: list): - """Appends the user message to the conversation history.""" - return "", history + [{"role": "user", "content": user_message}] + def chat(user_message, history: list): + """ + Consolidated function that: + 1) Appends the user message to the conversation history + 2) Sends the conversation history to the vLLM API + 3) Streams back the assistant's response + """ - def bot(history: list): - """Sends the conversation history to the vLLM API and streams the assistant's response.""" - # Append an empty assistant message to history to fill in as we receive the response + # 1) Append the user message + history = history + [{"role": "user", "content": user_message}] + + # 2) Prepare an empty assistant entry for streaming history.append({"role": "assistant", "content": ""}) + # 3) Stream the assistant's response try: - # Create a chat completion with streaming enabled using the client completion = client.chat.completions.create( - model="llama-3.2-3B-instruct", # Adjust the model name if needed + model="llama-3.2-3B-instruct", messages=history, stream=True ) - - # Iterate over the streamed response for chunk in completion: - # Access the delta content from the chunk delta = chunk.choices[0].delta content = getattr(delta, 'content', '') if content: - # Update the assistant's message with new content history[-1]['content'] += content - yield history + # Yield both the cleared textbox ("") and updated history + yield "", history except Exception as e: # Handle exceptions and display an error message history[-1]['content'] += f"\n[Error]: {str(e)}" - yield history + yield "", history - # Set up the Gradio interface components - msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( - bot, chatbot, chatbot + # Wire up the single chat function to the UI + msg.submit( + fn=chat, # single consolidated function + inputs=[msg, chatbot], # pass user message and current chatbot history + outputs=[msg, chatbot] # clear the message box and update the chatbot ) + clear.click(lambda: None, None, chatbot, queue=False) if __name__ == "__main__":