ALVHB95 commited on
Commit
61daa45
1 Parent(s): d888a39
Files changed (6) hide show
  1. Dockerfile.txt +11 -0
  2. README.md +5 -4
  3. app.py +148 -0
  4. front_4.jpg +0 -0
  5. requirements.txt +11 -0
  6. theme.py +92 -0
Dockerfile.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,13 +1,14 @@
1
  ---
2
  title: Test Alv 1
3
- emoji: 📊
4
- colorFrom: gray
5
- colorTo: pink
6
  sdk: gradio
7
- sdk_version: 4.19.2
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Test Alv 1
3
+ emoji: 💻
4
+ colorFrom: green
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 4.16.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ startup_duration_timeout: 2h
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+ import theme
5
+
6
+ from huggingface_hub import from_pretrained_keras
7
+ from tensorflow.keras.applications import EfficientNetB0
8
+
9
+ import tensorflow as tf
10
+ from tensorflow import keras
11
+ from PIL import Image
12
+
13
+ theme = theme.Theme()
14
+
15
+ import os
16
+ import sys
17
+ sys.path.append('../..')
18
+
19
+ #langchain
20
+ from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
21
+ from langchain.embeddings import HuggingFaceEmbeddings
22
+ from langchain.prompts import PromptTemplate
23
+ from langchain.chains import RetrievalQA
24
+ from langchain.prompts import ChatPromptTemplate
25
+ from langchain.schema import StrOutputParser
26
+ from langchain.schema.runnable import Runnable
27
+ from langchain.schema.runnable.config import RunnableConfig
28
+ from langchain.chains import (
29
+ LLMChain, ConversationalRetrievalChain)
30
+ from langchain.vectorstores import Chroma
31
+ from langchain.memory import ConversationBufferMemory
32
+ from langchain.chains import LLMChain
33
+ from langchain.prompts.prompt import PromptTemplate
34
+ from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate
35
+ from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, MessagesPlaceholder
36
+ from langchain.document_loaders import PyPDFDirectoryLoader
37
+ from langchain.output_parsers import PydanticOutputParser
38
+ from langchain_community.llms import HuggingFaceHub
39
+ from langchain_community.document_loaders import WebBaseLoader
40
+ from pydantic.v1 import BaseModel, Field
41
+ import shutil
42
+
43
+
44
+ custom_title = "<span style='color: rgb(243, 239, 224);'>Green Greta</span>"
45
+
46
+
47
+ # Cell 2: ChatBot Model
48
+ loader = WebBaseLoader(["https://www.epa.gov/recycle/frequent-questions-recycling"])
49
+ data=loader.load()
50
+ # split documents
51
+ text_splitter = RecursiveCharacterTextSplitter(
52
+ chunk_size=1024,
53
+ chunk_overlap=150,
54
+ length_function=len
55
+ )
56
+ docs = text_splitter.split_documents(data)
57
+ # define embedding
58
+ embeddings = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
59
+ # create vector database from data
60
+ persist_directory = 'docs/chroma/'
61
+
62
+ # Remove old database files if any
63
+ shutil.rmtree(persist_directory, ignore_errors=True)
64
+ vectordb = Chroma.from_documents(
65
+ documents=docs,
66
+ embedding=embeddings,
67
+ persist_directory=persist_directory
68
+ )
69
+ # define retriever
70
+ retriever = vectordb.as_retriever(search_kwargs={"k": 2}, search_type="mmr")
71
+
72
+ class FinalAnswer(BaseModel):
73
+ question: str = Field()
74
+ answer: str = Field()
75
+
76
+ # Assuming you have a parser for the FinalAnswer class
77
+ parser = PydanticOutputParser(pydantic_object=FinalAnswer)
78
+
79
+ template = """
80
+ Your name is Greta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish /
81
+ Has sido diseñado y creado por el Grupo 1 del Máster en Data Science & Big Data de la promoción 2023/2024 de la Universidad Complutense de Madrid. Este grupo está fromado por Rocío, María Guillermo, Alejandra, Paloma y Álvaro /
82
+ Use the following pieces of context to answer the question /
83
+ If the question is English answer in English /
84
+ If the question is Spanish answer in Spanish /
85
+ Do not mention the word context when you answer a question /
86
+ Answer the question fully and provide as much relevant detail as possible. Do not cut your response short /
87
+ Context: {context}
88
+ User: {question}
89
+ {format_instructions}
90
+ """
91
+
92
+ # Create the chat prompt templates
93
+ sys_prompt = SystemMessagePromptTemplate.from_template(template)
94
+ qa_prompt = ChatPromptTemplate(
95
+ messages=[
96
+ sys_prompt,
97
+ HumanMessagePromptTemplate.from_template("{question}")],
98
+ partial_variables={"format_instructions": parser.get_format_instructions()}
99
+ )
100
+ llm = HuggingFaceHub(
101
+ repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
102
+ task="text-generation",
103
+ model_kwargs={
104
+ "max_new_tokens": 2000,
105
+ "top_k": 30,
106
+ "temperature": 0.1,
107
+ "repetition_penalty": 1.03
108
+ },
109
+ )
110
+
111
+ qa_chain = ConversationalRetrievalChain.from_llm(
112
+ llm = llm,
113
+ memory = ConversationBufferMemory(llm=llm, memory_key="chat_history", input_key='question', output_key='output'),
114
+ retriever = retriever,
115
+ verbose = True,
116
+ combine_docs_chain_kwargs={'prompt': qa_prompt},
117
+ get_chat_history = lambda h : h,
118
+ rephrase_question = False,
119
+ output_key = 'output',
120
+ )
121
+
122
+ def chat_interface(question,history):
123
+ result = qa_chain.invoke({'question': question})
124
+ output_string = result['output']
125
+
126
+ # Find the index of the last occurrence of "answer": in the string
127
+ answer_index = output_string.rfind('"answer":')
128
+
129
+ # Extract the substring starting from the "answer": index
130
+ answer_part = output_string[answer_index + len('"answer":'):].strip()
131
+
132
+ # Find the next occurrence of a double quote to get the start of the answer value
133
+ quote_index = answer_part.find('"')
134
+
135
+ # Extract the answer value between double quotes
136
+ answer_value = answer_part[quote_index + 1:answer_part.find('"', quote_index + 1)]
137
+
138
+ return answer_value
139
+
140
+
141
+ chatbot_gradio_app = gr.ChatInterface(
142
+ fn=chat_interface,
143
+ additional_inputs=gr.Audio(sources=["microphone"]),
144
+ title=custom_title
145
+ )
146
+
147
+ chatbot_gradio_app.queue()
148
+ chatbot_gradio_app.launch()
front_4.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers==4.28.0
2
+ torch
3
+ langchain
4
+ langchain-community
5
+ pypdf
6
+ pydantic
7
+ sentence-transformers
8
+ chromadb
9
+ gradio
10
+ beautifulsoup4
11
+ tensorflow
theme.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Iterable
4
+
5
+ from gradio.themes.base import Base
6
+ from gradio.themes.utils import colors, fonts, sizes
7
+
8
+
9
+ class Theme(Base):
10
+ def __init__(
11
+ self,
12
+ *,
13
+ primary_hue: colors.Color | str = colors.lime,
14
+ secondary_hue: colors.Color | str = colors.emerald,
15
+ neutral_hue: colors.Color | str = colors.stone,
16
+ spacing_size: sizes.Size | str = sizes.spacing_lg,
17
+ radius_size: sizes.Size | str = sizes.radius_none,
18
+ text_size: sizes.Size | str = sizes.text_md,
19
+ font: fonts.Font | str | Iterable[fonts.Font | str] = (
20
+ fonts.GoogleFont("Quicksand"),
21
+ "ui-sans-serif",
22
+ "system-ui",
23
+ "sans-serif",
24
+ ),
25
+ font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
26
+ fonts.GoogleFont("IBM Plex Mono"),
27
+ "ui-monospace",
28
+ "Consolas",
29
+ "monospace",
30
+ ),
31
+ ):
32
+ super().__init__(
33
+ primary_hue=primary_hue,
34
+ secondary_hue=secondary_hue,
35
+ neutral_hue=neutral_hue,
36
+ spacing_size=spacing_size,
37
+ radius_size=radius_size,
38
+ text_size=text_size,
39
+ font=font,
40
+ font_mono=font_mono,
41
+ )
42
+ self.name = "theme"
43
+ super().set(
44
+ # Colors
45
+ slider_color="*neutral_900",
46
+ slider_color_dark="*neutral_500",
47
+ body_text_color="rgb(18,13,5)",
48
+ block_label_text_color="rgb(243, 239, 224)",
49
+ block_title_text_color="rgb(243, 239, 224)",
50
+ body_text_color_subdued="*neutral_400",
51
+ body_background_fill='*primary_800',
52
+ background_fill_primary='*primary_600',
53
+ background_fill_primary_dark='*primary_900',
54
+ background_fill_secondary_dark='*primary_900',
55
+ block_background_fill='rgb(53,66,48)',
56
+ block_background_fill_dark="*neutral_800",
57
+ input_background_fill_dark="*neutral_700",
58
+ # Button Colors
59
+ button_primary_background_fill="rgb(53,66,48)",
60
+ button_primary_background_fill_hover='*primary_200',
61
+ button_primary_text_color='*primary_600',
62
+ button_primary_background_fill_dark="*neutral_600",
63
+ button_primary_background_fill_hover_dark="*neutral_600",
64
+ button_primary_text_color_dark="white",
65
+ button_secondary_background_fill="*button_primary_background_fill",
66
+ button_secondary_background_fill_hover="*button_primary_background_fill_hover",
67
+ button_secondary_text_color="*button_primary_text_color",
68
+ button_cancel_background_fill="*button_primary_background_fill",
69
+ button_cancel_background_fill_hover="*button_primary_background_fill_hover",
70
+ button_cancel_text_color="*button_primary_text_color",
71
+ checkbox_label_background_fill="*button_primary_background_fill",
72
+ checkbox_label_background_fill_hover="*button_primary_background_fill_hover",
73
+ checkbox_label_text_color="*button_primary_text_color",
74
+ checkbox_background_color_selected="*neutral_600",
75
+ checkbox_background_color_dark="*neutral_700",
76
+ checkbox_background_color_selected_dark="*neutral_700",
77
+ checkbox_border_color_selected_dark="*neutral_800",
78
+ # Padding
79
+ checkbox_label_padding="*spacing_md",
80
+ button_large_padding="*spacing_lg",
81
+ button_small_padding="*spacing_sm",
82
+ # Borders
83
+ block_border_width="0px",
84
+ block_border_width_dark="1px",
85
+ shadow_drop_lg="0 1px 4px 0 rgb(0 0 0 / 0.1)",
86
+ block_shadow="*shadow_drop_lg",
87
+ block_shadow_dark="none",
88
+ # Block Labels
89
+ block_title_text_weight="600",
90
+ block_label_text_weight="600",
91
+ block_label_text_size="*text_md",
92
+ )