import gradio as gr from llama_cpp import Llama from langchain_community.llms import LlamaCpp from langchain.prompts import PromptTemplate import llama_cpp from langchain.callbacks.manager import CallbackManager from sentence_transformers import SentenceTransformer from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler import numpy as np import pandas as pd import re import os from sklearn.metrics.pairwise import cosine_similarity #model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2',device='cpu') # #llm = LlamaCpp( # model_path=r"C:\Users\Cora\.cache\lm-studio\models\YC-Chen\Breeze-7B-Instruct-v1_0-GGUF\breeze-7b-instruct-v1_0-q4_k_m.gguf", # n_gpu_layers=100, # n_batch=512, # n_ctx=3000, # f16_kv=True, # callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), # verbose=False, #) #embedd_bk=pd.read_pickle(r"C:\Users\Cora\推薦系統實作\bk_description1_角色形容詞_677.pkl") #df_bk=pd.read_excel(r"C:\Users\Cora\推薦系統實作\bk_description1_角色形容詞.xlsx") llm = None model = None embedd_bk = None df_bk = None def invoke_with_temperature(prompt, temperature=0.4): return llm.invoke(prompt, temperature=temperature) def process_user_input(message): user_mental_state4= PromptTemplate( input_variables=["input"], template="""[INST][/INST]""" ) user_character= PromptTemplate( input_variables=["input"], template="""[INST][/INST]""" ) df_user=pd.DataFrame(columns=["輸入內容","形容詞1", "形容詞2", "形容詞3", "角色1", "角色2", "角色3"]) prompt_value1=user_mental_state4.invoke({"input":message}) string=invoke_with_temperature(prompt_value1) #print("\n") # 將字符串分割為名詞 adjectives = [adj.strip() for adj in re.split('[,、,]', string)] index=len(df_user) df_user.loc[index, '輸入內容'] = message # 確保形容詞數量符合欄位數量 if len(adjectives) == 3: df_user.loc[index, '形容詞1'] = adjectives[0] df_user.loc[index, '形容詞2'] = adjectives[1] df_user.loc[index, '形容詞3'] = adjectives[2] df_user.to_excel("user_gradio系統.xlsx") return df_user def embedd_df_user(df_user): columns_to_encode=df_user.loc[:,["形容詞1", "形容詞2", "形容詞3"]] # 初始化一個空的 DataFrame,用來存儲向量化結果 embedd_user=df_user[["輸入內容"]] #user_em= user_em.assign(形容詞1=None, 形容詞2=None, 形容詞3=None,角色1=None,角色2=None,角色3=None) embedd_user= embedd_user.assign(形容詞1=None, 形容詞2=None, 形容詞3=None) # 遍歷每一個單元格,將結果存入新的 DataFrame 中 i=len(df_user)-1 for col in columns_to_encode: #print(i,col) # 將每個單元格的內容進行向量化 embedd_user.at[i, col] = model.encode(df_user.at[i, col]) embedd_user.to_pickle(r"C:\Users\Cora\推薦系統實作\user_gradio系統.pkl") return embedd_user def top_n_books_by_average(df, n=3): # 根据 `average` 列降序排序 sorted_df = df.sort_values(by='average', ascending=False) # 选择前 N 行 top_n_df = sorted_df.head(n) # 提取书名列 top_books = top_n_df['書名'].tolist() return top_books,sorted_df def similarity(embedd_user,embedd_bk,df_bk): df_similarity= pd.DataFrame(df_bk[['書名',"內容簡介","URL","形容詞1", "形容詞2", "形容詞3", '角色1', '角色2', '角色3']]) df_similarity['average'] = np.nan #for p in range(len(embedd_user)): index=len(embedd_user)-1 for k in range(len(embedd_bk)): list=[] for i in range(1,4): for j in range(3,6): vec1=embedd_user.iloc[index,i]#i是第i個形容詞,數字是第幾個是使用者輸入 vec2=embedd_bk.iloc[k,j] similarity = cosine_similarity([vec1], [vec2]) list.append(similarity[0][0]) # 计算总和 total_sum = sum(list) # 计算数量 count = len(list) # 计算平均值 average = total_sum / count df_similarity.loc[k,'average']=average top_books,sorted_df = top_n_books_by_average(df_similarity) return sorted_df def filter(sorted_df): filter_prompt4 = PromptTemplate( input_variables=["mental_issue", "user_identity"," book","book_reader", "book_description"], template="""[INST][/INST]""" ) df_filter=sorted_df.iloc[:20,:] df_filter = df_filter.reset_index(drop=True) df_filter=df_filter.assign(推薦=None) df_user=pd.DataFrame(columns=["輸入內容","形容詞1", "形容詞2", "形容詞3", "角色1", "角色2", "角色3"]) # p=len(df_user)-1 for k in range(len(df_filter)): word=df_user["輸入內容"].iloc[p] #book_reader = df_filter["角色1"].iloc[p] + "or" + df_filter["角色2"].iloc[p] + "or" + df_filter["角色3"].iloc[p] book=df_filter["書名"].iloc[k] book_reader = df_filter["角色1"].iloc[k] user_identity = df_user["角色1"].iloc[p] mental_issue=df_user["形容詞1"].iloc[p]+"、"+df_user["形容詞2"].iloc[p]+"、"+df_user["形容詞3"].iloc[p] book_description=df_filter["形容詞1"].iloc[k]+"、"+df_filter["形容詞2"].iloc[k]+"、"+df_filter["形容詞3"].iloc[k] print(book_reader) print(user_identity) #output = filter_prompt1.invoke({"user_identity": user_identity, "book_reader": book_reader}) output = filter_prompt4.invoke({"mental_issue":mental_issue,"user_identity": user_identity, "book":book,"book_description":book_description,"book_reader": book_reader}) string2=invoke_with_temperature(output) df_filter.loc[k, '推薦'] =string2 df_recommend=df_filter[df_filter["推薦"].str.strip() == "是"] return df_recommend def output_content(df_recommend): content_prompt = PromptTemplate( input_variables=["content"], template="""[INST][/INST]""" ) a=0 title=df_recommend.loc[a,"書名"] prompt_value1=recommend_prompt.invoke({"title":title,"URL":URL,"summary":summary}) recommend_prompt = PromptTemplate( input_variables=["title"], #template= ) prompt_value1=recommend_prompt.invoke({"title":title}) output=invoke_with_temperature(prompt_value1,temperature=0.4) return output def main_pipeline(message,history): df_user=process_user_input(message) embedd_user=embedd_df_user(df_user) sorted_df=similarity(embedd_user,embedd_bk,df_bk) df_filter=filter(sorted_df) final=output_content(df_filter) return final demo=gr.ChatInterface(main_pipeline) if __name__ == "__main__": demo.launch()