|
|
import streamlit as st |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
import torch |
|
|
|
|
|
st.set_page_config(page_title="TinyLlama Chatbot") |
|
|
|
|
|
@st.cache_resource |
|
|
def load_model(): |
|
|
model_name = "TinyLlama/TinyLlama-1.1B-chat-v1.0" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
|
model.eval() |
|
|
return tokenizer, model |
|
|
|
|
|
st.title("Turbo Chatbot♑") |
|
|
|
|
|
user_input = st.text_input("Ask a question") |
|
|
|
|
|
if user_input: |
|
|
with st.spinner("Generating response..."): |
|
|
tokenizer, model = load_model() |
|
|
|
|
|
prompt = f"""<|system|> |
|
|
You are a helpful AI assistant. |
|
|
<|user|> |
|
|
{user_input} |
|
|
<|assistant|> |
|
|
""" |
|
|
|
|
|
input_ids = tokenizer(prompt, return_tensors="pt").input_ids |
|
|
|
|
|
with torch.no_grad(): |
|
|
output = model.generate( |
|
|
input_ids, |
|
|
max_new_tokens=120, |
|
|
temperature=0.6, |
|
|
top_p=0.9, |
|
|
do_sample=True, |
|
|
pad_token_id=tokenizer.eos_token_id |
|
|
) |
|
|
|
|
|
decoded = tokenizer.decode(output[0], skip_special_tokens=True) |
|
|
response = decoded.split("<|assistant|>")[-1].strip() |
|
|
|
|
|
st.subheader("Response") |
|
|
st.write(response) |
|
|
|