Spaces:

yosuke123456
/

chatappdemo01

Runtime error

App Files Files Community

chatappdemo01 / app.py

yosuke123456

Update app.py

cf7e2b9 verified 11 months ago

raw

history blame contribute delete

5.6 kB

	# https://qiita.com/nekoniii3/items/5acf764af65212d9f04f

	import gradio as gr

	import os

	from langchain_community.document_loaders import PyMuPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_openai import ChatOpenAI
	from langchain_community.vectorstores import Chroma
	from langchain.chains import RetrievalQA
	# from langchain_openai import OpenAIEmbeddings
	from langchain_community.embeddings import HuggingFaceEmbeddings


	os.environ["TOKENIZERS_PARALLELISM"] = "false"
	# os.environ["OPENAI_API_KEY"] = "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"

	file_name1 = 'ALV2_ALV3DTU操作マニュアルDTU-V3SET01.pdf'
	file_name2 = 'ALV3PCサーバ_ソフトウェア操作マニュアル_画像ファイル名付.pdf'
	file_name3 = '美和ロック総合カタログ第31版_前半.pdf'
	file_name4 = '美和ロック総合カタログ第31版_後半.pdf'

	loader1 = PyMuPDFLoader(file_name1)
	loader2 = PyMuPDFLoader(file_name2)
	loader3 = PyMuPDFLoader(file_name3)
	loader4 = PyMuPDFLoader(file_name4)

	documents1 = loader1.load()
	documents2 = loader2.load()
	documents3 = loader3.load()
	documents4 = loader4.load()

	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

	texts1 = text_splitter.split_documents(documents1)
	texts2 = text_splitter.split_documents(documents2)
	texts3 = text_splitter.split_documents(documents3)
	texts4 = text_splitter.split_documents(documents4)
	texts = texts1 + texts2 + texts3 + texts4

	# embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
	embeddings = HuggingFaceEmbeddings(model_name="oshizo/sbert-jsnli-luke-japanese-base-lite")
	vectordb = Chroma.from_documents(texts, embeddings)
	llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.05)

	qa = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=vectordb.as_retriever(),
	return_source_documents=True)

	import shutil
	def save_image_filepath(filepath: str):
	print(filepath)
	# イメージを保存
	_, file_extension = os.path.splitext(filepath)
	shutil.copy(filepath, './filepath{}'.format(file_extension))
	pass

	import boto3
	s3 = boto3.client('s3',
	aws_access_key_id="AKIA6ENMUHYQ7KWAEV7Q",
	aws_secret_access_key="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",
	region_name='ap-northeast-1'
	)


	# 画像のURL出力機能
	def get_public_url(bucket, target_object_path):
	"""
	対象のS3ファイルのURLを取得する

	Parameters
	----------
	bucket: string
	S3のバケット名
	target_object_path: string
	取得したいS3内のファイルパス

	Returns
	----------
	url: string
	S3上のオブジェクトのURL
	"""
	bucket_location = s3.get_bucket_location(Bucket=bucket)
	return "https://s3-{0}.amazonaws.com/{1}/{2}".format(
	bucket_location['LocationConstraint'],
	bucket,
	target_object_path)

	import fitz
	doc1 = fitz.open(file_name1)
	doc2 = fitz.open(file_name2)

	import math

	with gr.Blocks() as demo:
	chatbot = gr.Chatbot()

	msg = gr.Textbox()

	def user(user_message, history):
	reply2 = qa(user_message)
	reply=reply2['result']

	for sd in reply2["source_documents"]:
	# page_content = str(sd.page_content)
	source = str(sd.metadata["source"])
	page = sd.metadata["page"]+1
	page_num = str(page).zfill(3)
	# print("PDF：" + source)
	# print("ページ：" + page_num)

	if source == file_name1:
	# ページ画像のURLを取得
	bucket='page.dtu.manual'
	key='page'+page_num+'_raster.png'
	url = get_public_url(bucket, key)
	reply = reply + ' <a href='+url+'>'+page_num+'</a>'

	elif source == file_name2:
	# ページ画像のURLを取得
	bucket='page.server.manual'
	key='page'+page_num+'_raster.png'
	url = get_public_url(bucket, key)
	reply = reply + ' <a href='+url+'>'+page_num+'</a>'

	# PDFに貼り付けある画像のURLを取得
	bucket='image.server.manual'
	page2 = doc2[page]
	page_annotations = page2.annots()
	for annotation in page_annotations:
	annotation_num = str(annotation).zfill(3)
	# 注釈のプロパティを取得
	key = annotation.info.get('content', '') # ノート注釈のテキストを取得
	url = get_public_url(bucket, key)
	reply = reply + ' <a href='+url+'>'+key+'</a>'
	elif source == file_name3:
	page2 = str(math.floor(1+float(page_num)/2))
	url = "https://dcs.mediapress-net.com/iportal/cv.do?c=20958580000&pg="+page2+"&v=MIW10001&d=LINK_MIW"
	reply = reply + ' <a href="'+url+'">'+page2+'</a>'
	elif source == file_name4:
	page2 = str(math.floor(1+(486+float(page_num))/2))
	url = "https://dcs.mediapress-net.com/iportal/cv.do?c=20958580000&pg="+page2+"&v=MIW10001&d=LINK_MIW"
	reply = reply + ' <a href="'+url+'">'+page2+'</a>'
	else:
	exit(0)

	return "", history + [[user_message, reply]]

	def bot(history):
	yield history

	msg.submit(user, [msg, chatbot], [msg, chatbot], queue=True).then(
	bot, chatbot, chatbot
	)

	demo.queue()
	demo.launch(share=True)