Spaces:

JaphetHernandez
/

Prueba_1

Running

App Files Files Community

JaphetHernandez commited on Oct 28

Commit

235f923

•

1 Parent(s): 2a1126f

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -0

app.py CHANGED Viewed

@@ -1,3 +1,129 @@
 import pandas as pd
 import streamlit as st
 from langchain.llms import HuggingFacePipeline
@@ -81,3 +207,4 @@ if uploaded_file is not None:
                     st.error(f"Error durante la generación: {e}")
     else:
         st.error("La columna 'job_title' no se encuentra en el archivo CSV.")

+import pandas as pd
+import streamlit as st
+from langchain_huggingface import HuggingFacePipeline  # Nueva importación
+from langchain_core.prompts import PromptTemplate
+from langchain.chains import LLMChain
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from huggingface_hub import login
+import torch
+import json
+from datetime import datetime
+# Autenticación con Fireworks en Hugging Face
+huggingface_token = st.secrets["FIREWORKS"]
+login(huggingface_token)
+# Configurar modelo Fireworks desde Hugging Face
+model_id = "fireworks-ai/firefunction-v2"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype=torch.float16
+)
+# Definir funciones específicas para Fireworks
+function_spec = [
+    {
+        "name": "calculate_cosine_similarity",
+        "description": "Calculate the cosine similarity between two strings.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "The main query string for similarity calculation"
+                },
+                "job_title": {
+                    "type": "string",
+                    "description": "The job title to compare with the query"
+                }
+            },
+            "required": ["query", "job_title"]
+        }
+    }
+]
+functions = json.dumps(function_spec, indent=4)
+# Crear pipeline para generación de texto con Fireworks
+fireworks_pipeline = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    max_new_tokens=128
+)
+# Adaptar el pipeline a LangChain
+llm_pipeline = HuggingFacePipeline(pipeline=fireworks_pipeline)
+# Interfaz de Streamlit
+st.title("Cosine Similarity Calculation with Fireworks, LangChain, and Llama 3.1")
+# Subir archivo CSV
+uploaded_file = st.file_uploader("Sube un archivo CSV con la columna 'job_title':", type=["csv"])
+if uploaded_file is not None:
+    # Cargar el CSV en un DataFrame
+    df = pd.read_csv(uploaded_file)
+    if 'job_title' in df.columns:
+        query = 'aspiring human resources specialist'
+        job_titles = df['job_title'].tolist()
+        # Definir el prompt para Fireworks
+        prompt_template = PromptTemplate(
+            template=(
+                "Calculate the cosine similarity between the query: '{query}' "
+                "and the list of job titles: {job_titles}. "
+                "Return the results as 'Job Title: [Job Title], Score: [Cosine Similarity Score]'."
+            ),
+            input_variables=["query", "job_titles"]
+        )
+        # Crear el LLMChain para manejar la interacción con Fireworks
+        llm_chain = LLMChain(
+            llm=llm_pipeline,
+            prompt=prompt_template
+        )
+        # Ejecutar la generación con Fireworks y funciones
+        if st.button("Calcular Similitud de Coseno"):
+            with st.spinner("Calculando similitudes con Fireworks..."):
+                try:
+                    # Preparar mensajes y funciones para Fireworks
+                    messages = [
+                        {'role': 'system', 'content': 'You are a helpful assistant with access to functions. Use them if required.'},
+                        {'role': 'user', 'content': f'Calculate cosine similarity for query: {query} with job titles.'}
+                    ]
+                    now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+                    model_inputs = tokenizer.apply_chat_template(
+                        messages,
+                        functions=functions,
+                        datetime=now,
+                        return_tensors="pt"
+                    ).to(model.device)
+                    # Generar resultados con Fireworks
+                    generated_ids = model.generate(model_inputs, max_new_tokens=128)
+                    decoded = tokenizer.batch_decode(generated_ids)
+                    st.write("Respuesta del modelo:")
+                    st.write(decoded[0])
+                    # Simular la asignación de puntajes en la columna 'Score' (basado en la respuesta del modelo)
+                    df['Score'] = [0.95] * len(df)  # Simulación para la demostración
+                    # Mostrar el dataframe actualizado
+                    st.write("DataFrame con los puntajes de similitud:")
+                    st.write(df)
+                except Exception as e:
+                    st.error(f"Error durante la generación: {e}")
+    else:
+        st.error("La columna 'job_title' no se encuentra en el archivo CSV.")
+'''
 import pandas as pd
 import streamlit as st
 from langchain.llms import HuggingFacePipeline
                     st.error(f"Error durante la generación: {e}")
     else:
         st.error("La columna 'job_title' no se encuentra en el archivo CSV.")
+'''