Spaces:
Runtime error
Runtime error
#!/usr/bin/env python | |
# coding: utf-8 | |
# In[52]: | |
# !pip install -q pymilvus towhee gradio | |
# In[53]: | |
#!curl -L https://github.com/pankajkishore/Cognitive-Project/blob/master/latest_ticket_data.csv -O | |
# In[1]: | |
import pandas as pd | |
df = pd.read_csv('latest_ticket_data.csv') | |
df.head() | |
# In[2]: | |
df.shape | |
# In[3]: | |
df['length'] = df['description'].apply( | |
lambda row: min(len(row.split(" ")), len(row)) if isinstance(row, str) else None | |
) | |
df['length'].max() | |
# In[4]: | |
df.description[14] | |
# In[5]: | |
df.shape | |
# In[6]: | |
id_category = df.set_index('id')['category'].to_dict() | |
# In[7]: | |
id_description = df.set_index('id')['description'].to_dict() | |
# In[8]: | |
id_description[12] | |
# In[9]: | |
id_category[10] | |
# In[11]: | |
from milvus import default_server | |
from pymilvus import connections, utility | |
default_server.start() | |
# In[12]: | |
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility | |
# In[24]: | |
# # Milvus parameters | |
connections.connect(host='127.0.0.1', port='19530') | |
# In[25]: | |
default_server.listen_port | |
# In[17]: | |
def create_milvus_collection(collection_name, dim): | |
connections.connect(host='127.0.0.1', port='19530') | |
if utility.has_collection(collection_name): | |
utility.drop_collection(collection_name) | |
fields = [ | |
FieldSchema(name='id', dtype=DataType.VARCHAR, descrition='ids', max_length=500, is_primary=True, auto_id=False), | |
FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim) | |
] | |
schema = CollectionSchema(fields=fields, description='reverse text search') | |
collection = Collection(name=collection_name, schema=schema) | |
# create IVF_FLAT index for collection. | |
index_params = { | |
'metric_type':'L2', | |
'index_type':"IVF_FLAT", | |
'params':{"nlist":2048} | |
} | |
collection.create_index(field_name="embedding", index_params=index_params) | |
return collection | |
# In[18]: | |
collection = create_milvus_collection('latest_ticket_data', 768) | |
# In[19]: | |
collection.load() | |
# In[26]: | |
from towhee import pipe, ops | |
import numpy as np | |
from towhee.datacollection import DataCollection | |
insert_pipe = ( | |
pipe.input('id', 'description', 'category') | |
.map('description', 'vec', ops.text_embedding.dpr(model_name='facebook/dpr-ctx_encoder-single-nq-base')) | |
.map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0)) | |
.map(('id', 'vec'), 'insert_status', ops.ann_insert.milvus_client(host='127.0.0.1', | |
port='19530', | |
collection_name='latest_ticket_data')) | |
.output() | |
) | |
# In[ ]: | |
# File "/Users/www.abcom.in/Documents/milvus/.milvusenv/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 238, in forward | |
# embeddings += position_embeddings | |
# RuntimeError: The size of tensor a (1002) must match the size of tensor b (512) at non-singleton dimension 1 | |
# In[27]: | |
import csv | |
with open('latest_ticket_data.csv', encoding='utf-8') as f: | |
reader = csv.reader(f) | |
next(reader) | |
for row in reader: | |
insert_pipe(*row) | |
# In[28]: | |
collection.load() | |
# In[29]: | |
print('Total number of inserted data is {}.'.format(collection.num_entities)) | |
# In[30]: | |
ans_pipe = ( | |
pipe.input('description') | |
.map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base")) | |
.map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0)) | |
.map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1', | |
port='19530', | |
collection_name='latest_ticket_data', | |
limit=1)) | |
.map('res', 'category', lambda x: [id_category[int(i[0])] for i in x]) | |
.output('description', 'category') | |
) | |
# In[31]: | |
ans = ans_pipe('report hi please attached report user take appropriate actions order agent her computer') | |
# In[32]: | |
ans = DataCollection(ans) | |
ans.show() | |
# In[33]: | |
import towhee | |
def chat(message, history): | |
history = history or [] | |
ans_pipe = ( | |
pipe.input('description') | |
.map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base")) | |
.map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0)) | |
.map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1', port='19530', collection_name='latest_ticket_data', limit=1)) | |
.map('res', 'category', lambda x: [id_category[int(i[0])] for i in x]) | |
.output('description', 'category') | |
) | |
response = ans_pipe(message).get()[1][0] | |
history.append((message, response)) | |
return history, history | |
# In[34]: | |
import gradio | |
collection.load() | |
chatbot = gradio.Chatbot(color_map=("green", "gray")) | |
interface = gradio.Interface( | |
chat, | |
["text", "state"], | |
[chatbot, "state"], | |
allow_screenshot=False, | |
allow_flagging="never", | |
) | |
interface.launch(inline=True, share=True) | |
# In[ ]: | |
# In[ ]: | |
# In[ ]: | |
# In[ ]: | |