diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d2e8eaa --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +# Stage 1: Build with dependencies +FROM python:3.10-bookworm + +# Set the working directory +WORKDIR /usr/src/app + +RUN pip install uv==0.4.28 +# Copy the requirements file and install the dependencies +COPY requirements.txt . + +# Install the dependencies +RUN export PYTHON=$(which python) && \ + uv pip install -r ./requirements.txt --python $PYTHON + +# Copy the application code +COPY . . + +# Expose the port for the application +EXPOSE 7860 +ENV GRADIO_SERVER_NAME="0.0.0.0" + +# Run the application +CMD ["python", "app.py"] \ No newline at end of file diff --git a/README.md b/README.md index 739036e..9daeafb 100644 --- a/README.md +++ b/README.md @@ -1,3 +1 @@ -# simple-chatbot - -A (very) simple, text only chatbot using Society AI inference endpoint \ No newline at end of file +test diff --git a/app.py b/app.py new file mode 100644 index 0000000..19d272b --- /dev/null +++ b/app.py @@ -0,0 +1,59 @@ +import gradio as gr +from openai import OpenAI + +# Initialize the OpenAI client +client = OpenAI( + api_key="EMPTY", + base_url='https://llama-3-2-3b.societyai.com/openai/v1', +) + +# If your endpoint requires an API key, uncomment and set it here +# client.api_key = 'your-api-key' + +# Optionally, disable SSL verification if necessary (not recommended for production) +# client.verify_ssl_certs = False + +with gr.Blocks(css="footer {visibility: hidden}") as demo: + chatbot = gr.Chatbot(type="messages") + msg = gr.Textbox() + clear = gr.Button("Clear") + + def user(user_message, history: list): + """Appends the user message to the conversation history.""" + return "", history + [{"role": "user", "content": user_message}] + + def bot(history: list): + """Sends the conversation history to the vLLM API and streams the assistant's response.""" + # Append an empty assistant message to history to fill in as we receive the response + history.append({"role": "assistant", "content": ""}) + + try: + # Create a chat completion with streaming enabled using the client + completion = client.chat.completions.create( + model="llama-3.2-3B-instruct", # Adjust the model name if needed + messages=history, + stream=True + ) + + # Iterate over the streamed response + for chunk in completion: + # Access the delta content from the chunk + delta = chunk.choices[0].delta + content = getattr(delta, 'content', '') + if content: + # Update the assistant's message with new content + history[-1]['content'] += content + yield history + except Exception as e: + # Handle exceptions and display an error message + history[-1]['content'] += f"\n[Error]: {str(e)}" + yield history + + # Set up the Gradio interface components + msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( + bot, chatbot, chatbot + ) + clear.click(lambda: None, None, chatbot, queue=False) + +if __name__ == "__main__": + demo.launch() diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..451eced --- /dev/null +++ b/build.sh @@ -0,0 +1 @@ +docker buildx build --builder mybuilder --platform linux/amd64 --tag 637423653021.dkr.ecr.us-east-2.amazonaws.com/gradio-apps:test --load . \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..518e73f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +gradio==5.4.0 +openai==1.52.2 +uv==0.4.28 \ No newline at end of file