tomyvo's picture
Update app.py
b5e7f9f verified
import pandas as pd
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from sentence_transformers import CrossEncoder
import gradio as gr
import torch
# ๐Ÿ”น Pfade
CHROMA_DIR = "chroma_movies_bge" # Lokaler Chroma-Ordner
CSV_PATH = "FINALE_NEW_WITH_IMAGES.csv" # CSV mit Poster URLs
PLACEHOLDER_IMAGE = "placeholder.png" # Falls Poster fehlt
# ๐Ÿ”น CSV laden
movies = pd.read_csv(CSV_PATH, encoding="utf-8")
movies["doc_id"] = movies.index
# ๐Ÿ”น CUDA Check
device = "cuda" if torch.cuda.is_available() else "cpu"
# ๐Ÿ”น Embeddings laden
embeddings = HuggingFaceEmbeddings(
model_name="BAAI/bge-m3",
model_kwargs = {"device": device},
encode_kwargs={"normalize_embeddings": True}
)
# ๐Ÿ”น Chroma-Index laden
db_movies = Chroma(
persist_directory=CHROMA_DIR,
embedding_function=embeddings
)
# ๐Ÿ”น Cross-Encoder laden
reranker = CrossEncoder("cross-encoder/stsb-roberta-large", device=device)
# ๐Ÿ”น Funktion fรผr semantische Empfehlungen
def retrieve_semantic_recommendations(query: str, top_k: int = 10):
recs = db_movies.similarity_search(query, k=50)
pairs = [(query, rec.page_content) for rec in recs]
scores = reranker.predict(pairs, batch_size=8)
scores_dict = {}
for rec, score in zip(recs, scores):
doc_id = rec.metadata["doc_id"]
if doc_id not in scores_dict or score > scores_dict[doc_id]["score"]:
scores_dict[doc_id] = {"score": score, "rec": rec}
unique_top_recs = sorted(scores_dict.values(), key=lambda x: x["score"], reverse=True)[:top_k]
gallery = []
for item in unique_top_recs:
rec = item["rec"]
metadata = rec.metadata
# Poster aus CSV anhand doc_id
movie_row = movies[movies["doc_id"] == metadata["doc_id"]].iloc[0]
cover_url = movie_row.get("poster_url", PLACEHOLDER_IMAGE)
label = f"{movie_row['title']}\n\n{movie_row['description']}"
gallery.append((cover_url, label))
return gallery
# ๐Ÿ”น Gradio-App
with gr.Blocks(theme=gr.themes.Glass()) as demo:
gr.Markdown("# ๐ŸŽฌ Semantic Movie Recommender (Online Posters)")
with gr.Row():
user_query = gr.Textbox(label="Describe your ideal movie", placeholder="e.g., A sci-fi movie about time travel")
submit_button = gr.Button("๐Ÿ” Find recommendations")
gr.Markdown("## ๐Ÿฟ Recommended Movies")
output = gr.Gallery(label="Movies", columns=3, rows=3, show_label=True)
submit_button.click(fn=retrieve_semantic_recommendations, inputs=user_query, outputs=output)
if __name__ == "__main__":
demo.launch()