diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..d2e8eaa
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,23 @@
+# Stage 1: Build with dependencies
+FROM python:3.10-bookworm
+
+# Set the working directory
+WORKDIR /usr/src/app
+
+RUN pip install uv==0.4.28
+# Copy the requirements file and install the dependencies
+COPY requirements.txt .
+
+# Install the dependencies
+RUN export PYTHON=$(which python) && \ 
+    uv pip install -r ./requirements.txt --python $PYTHON
+
+# Copy the application code
+COPY . .
+
+# Expose the port for the application
+EXPOSE 7860
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+
+# Run the application
+CMD ["python", "app.py"]
\ No newline at end of file
diff --git a/README.md b/README.md
index 739036e..9daeafb 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1 @@
-# simple-chatbot
-
-A (very) simple, text only chatbot using Society AI inference endpoint
\ No newline at end of file
+test
diff --git a/app.py b/app.py
new file mode 100644
index 0000000..19d272b
--- /dev/null
+++ b/app.py
@@ -0,0 +1,59 @@
+import gradio as gr
+from openai import OpenAI
+
+# Initialize the OpenAI client
+client = OpenAI(
+    api_key="EMPTY",
+    base_url='https://llama-3-2-3b.societyai.com/openai/v1',
+)
+
+# If your endpoint requires an API key, uncomment and set it here
+# client.api_key = 'your-api-key'
+
+# Optionally, disable SSL verification if necessary (not recommended for production)
+# client.verify_ssl_certs = False
+
+with gr.Blocks(css="footer {visibility: hidden}") as demo:
+    chatbot = gr.Chatbot(type="messages")
+    msg = gr.Textbox()
+    clear = gr.Button("Clear")
+
+    def user(user_message, history: list):
+        """Appends the user message to the conversation history."""
+        return "", history + [{"role": "user", "content": user_message}]
+
+    def bot(history: list):
+        """Sends the conversation history to the vLLM API and streams the assistant's response."""
+        # Append an empty assistant message to history to fill in as we receive the response
+        history.append({"role": "assistant", "content": ""})
+
+        try:
+            # Create a chat completion with streaming enabled using the client
+            completion = client.chat.completions.create(
+                model="llama-3.2-3B-instruct",  # Adjust the model name if needed
+                messages=history,
+                stream=True
+            )
+
+            # Iterate over the streamed response
+            for chunk in completion:
+                # Access the delta content from the chunk
+                delta = chunk.choices[0].delta
+                content = getattr(delta, 'content', '')
+                if content:
+                    # Update the assistant's message with new content
+                    history[-1]['content'] += content
+                    yield history
+        except Exception as e:
+            # Handle exceptions and display an error message
+            history[-1]['content'] += f"\n[Error]: {str(e)}"
+            yield history
+
+    # Set up the Gradio interface components
+    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+        bot, chatbot, chatbot
+    )
+    clear.click(lambda: None, None, chatbot, queue=False)
+
+if __name__ == "__main__":
+    demo.launch()
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..451eced
--- /dev/null
+++ b/build.sh
@@ -0,0 +1 @@
+docker buildx build --builder mybuilder --platform linux/amd64 --tag 637423653021.dkr.ecr.us-east-2.amazonaws.com/gradio-apps:test --load .
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..518e73f
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+gradio==5.4.0
+openai==1.52.2
+uv==0.4.28
\ No newline at end of file