Spaces:

JaphetHernandez
/

Prueba_1

Sleeping

App Files Files Community

Prueba_1 / app.py

JaphetHernandez

Update app.py

7c93cc3 verified about 1 year ago

raw

history blame contribute delete

9.34 kB

	import pandas as pd
	import streamlit as st
	from langchain_huggingface import HuggingFacePipeline
	from langchain_core.prompts import PromptTemplate
	from langchain.chains import LLMChain
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
	from huggingface_hub import login
	import torch
	import json
	import os
	from datetime import datetime

	# Configurar variable de entorno para evitar la fragmentación de memoria en CUDA
	os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

	# Autenticación con Fireworks en Hugging Face
	huggingface_token = st.secrets["FIREWORKS"]
	login(huggingface_token)

	# Configurar modelo Fireworks con cuantización int8 y offload en la CPU
	quant_config = BitsAndBytesConfig(
	load_in_8bit=True, # Activar la carga en int8
	llm_int8_enable_fp32_cpu_offload=True, # Permitir offload en la CPU
	quantization_scheme="gptq" # Especificar el esquema GPTQ
	)

	model_id = "fireworks-ai/firefunction-v2"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	device_map="auto", # Permitir offloading automático entre CPU y GPU
	torch_dtype=torch.float16,
	quantization_config=quant_config
	)

	# Establecer el token de relleno
	if tokenizer.pad_token_id is None:
	tokenizer.pad_token_id = tokenizer.eos_token_id

	# Crear pipeline para generación de texto con Fireworks
	fireworks_pipeline = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=20 # Reducir max_new_tokens para minimizar el uso de memoria
	)

	# Adaptar el pipeline a LangChain
	llm_pipeline = HuggingFacePipeline(pipeline=fireworks_pipeline)

	# Interfaz de Streamlit
	st.title("Cosine Similarity Calculation with Fireworks, LangChain, and Llama 3.1")

	# Subir archivo CSV
	uploaded_file = st.file_uploader("Sube un archivo CSV con la columna 'job_title':", type=["csv"])

	if uploaded_file is not None:
	# Cargar el CSV en un DataFrame
	df = pd.read_csv(uploaded_file)
	if 'job_title' in df.columns:
	query = 'aspiring human resources specialist'
	job_titles = df['job_title'].tolist()

	# Procesar en lotes para optimización
	batch_size = 4 # Reducir batch size para minimizar el uso de memoria
	job_titles_batches = [job_titles[i:i+batch_size] for i in range(0, len(job_titles), batch_size)]

	# Definir el prompt para Fireworks con formato de función
	prompt_template = PromptTemplate(
	template=(
	"Function: calculate_cosine_similarity\n"
	"Description: Calculate the cosine similarity between the given query and job titles.\n"
	"Parameters:\n"
	" - query: The query string to compare.\n"
	" - job_titles: A list of job titles to compare against.\n"
	"Input:\n"
	" query: '{query}'\n"
	" job_titles: {job_titles}\n"
	"Output:\n"
	" Return the results as 'Job Title: [Job Title], Score: [Cosine Similarity Score]'."
	),
	input_variables=["query", "job_titles"]
	)

	# Crear el LLMChain para manejar la interacción con Fireworks
	llm_chain = LLMChain(
	llm=llm_pipeline,
	prompt=prompt_template
	)

	# Ejecutar la generación con Fireworks y funciones
	if st.button("Calcular Similitud de Coseno"):
	with st.spinner("Calculando similitudes con Fireworks..."):
	all_scores = []
	try:
	for batch in job_titles_batches:
	# Tokenizar la entrada y mover los tensores a CUDA
	model_inputs = tokenizer(
	batch,
	return_tensors="pt",
	padding=True,
	truncation=True
	).to("cuda") # Mover solo los tensores de entrada a CUDA

	with torch.cuda.amp.autocast(): # Usar Mixed Precision
	model_inputs['attention_mask'] = (model_inputs['input_ids'] != tokenizer.pad_token_id).int().to("cuda")
	generated_ids = model.generate(
	**model_inputs,
	max_new_tokens=20, # Reducir para minimizar el uso de memoria
	num_beams=1 # Desactivar búsqueda en beam para más velocidad
	)

	# Decodificar el resultado y añadirlo a la lista de resultados
	decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
	all_scores.extend([0.95] * len(batch)) # Simulación para demostración

	# Liberar memoria después de cada batch
	del model_inputs, generated_ids
	torch.cuda.empty_cache()

	# Asignar puntajes al DataFrame
	df['Score'] = all_scores

	# Mostrar el dataframe actualizado
	st.write("DataFrame con los puntajes de similitud:")
	st.write(df)
	except Exception as e:
	st.error(f"Error durante la generación: {e}")
	else:
	st.error("La columna 'job_title' no se encuentra en el archivo CSV.")



	'''


	import pandas as pd
	import streamlit as st
	from langchain.llms import HuggingFacePipeline
	from langchain_core.prompts import PromptTemplate
	from langchain.chains import LLMChain
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	from huggingface_hub import login
	import torch

	# API Key de Hugging Face
	huggingface_token = st.secrets["FIREWORKS"]
	login(huggingface_token)# Autenticar
	#login(api_key)


	# Configurar modelo Llama 3.1
	model_id = "meta-llama/Llama-3.2-1B"
	tokenizer = AutoTokenizer.from_pretrained(model_id, truncation=True)
	model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16)

	# Crear pipeline con Fireworks
	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=50) #, max_length=1024)
	llm_pipeline = HuggingFacePipeline(pipeline=pipe)

	# Interfaz de Streamlit
	st.title("Cosine Similarity Calculation with Fireworks, LangChain, and Llama 3.1")

	# Subir archivo CSV
	uploaded_file = st.file_uploader("Sube un archivo CSV con la columna 'job_title':", type=["csv"])

	if uploaded_file is not None:
	# Cargar el CSV en un DataFrame
	df = pd.read_csv(uploaded_file)
	print(df)

	if 'job_title' in df.columns:
	query = 'aspiring human resources specialist'
	job_titles = df['job_title'].tolist()

	# Definir el prompt para usar Fireworks para cálculo de similitud de coseno
	# Crear el prompt mejorado para Fireworks
	prompt_template = PromptTemplate(
	template=(
	"You are an AI model with access to external embeddings services. Your task is to calculate the cosine similarity "
	"between a given query and a list of job titles using embeddings obtained from an external service. "
	"Follow these steps to complete the task:\n\n"
	"1. Retrieve the embeddings for the query: '{query}' from the external embeddings service.\n"
	"2. For each job title in the list below, retrieve the corresponding embeddings from the same external service.\n"
	"3. Calculate the cosine similarity between the query embeddings and the embeddings of each job title.\n"
	"4. Return the results in the following format:\n"
	" - Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
	" - Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
	" ...\n\n"
	"The list of job titles is:\n{job_titles}\n\n"
	"Remember to access the embeddings service directly and ensure that the cosine similarity scores are calculated accurately based on the semantic similarity between the embeddings."
	),
	input_variables=["query", "job_titles"]
	)

	# Crear el LLMChain para manejar la interacción con Fireworks
	llm_chain = LLMChain(
	llm=llm_pipeline,
	prompt=prompt_template
	)

	# Ejecutar la generación con el LLM
	if st.button("Calcular Similitud de Coseno"):
	with st.spinner("Calculando similitudes con Fireworks y Llama 3.1..."):
	try:
	result = llm_chain.run({"query": query, "job_titles": job_titles})
	st.write("Respuesta del modelo:")
	st.write(result)

	# Simular la asignación de puntajes en la columna 'Score' (basado en la respuesta del modelo)
	df['Score'] = [0.95] * len(df) # Simulación para la demostración

	# Mostrar el dataframe actualizado
	st.write("DataFrame con los puntajes de similitud:")
	st.write(df)
	except Exception as e:
	st.error(f"Error durante la generación: {e}")
	else:
	st.error("La columna 'job_title' no se encuentra en el archivo CSV.")
	'''