Spaces:
Sleeping
Sleeping
Upload 8 files
Browse files- .gitattributes +4 -0
- Dockerfile.txt +11 -0
- app.py +163 -0
- pdfs/Acceptable-Ways-to-separate-and-dispose-of-garbage-and-recyclables.pdf +3 -0
- pdfs/Advice-on-recycling-and-resource-recovery-FINAL-REPORT.pdf +3 -0
- pdfs/Guidance on municipal waste data collection.pdf +3 -0
- pdfs/mygov-999999999489028046.pdf +3 -0
- requirements.txt +10 -0
- theme.py +92 -0
.gitattributes
CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
pdfs/Acceptable-Ways-to-separate-and-dispose-of-garbage-and-recyclables.pdf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
pdfs/Advice-on-recycling-and-resource-recovery-FINAL-REPORT.pdf filter=lfs diff=lfs merge=lfs -text
|
38 |
+
pdfs/Guidance[[:space:]]on[[:space:]]municipal[[:space:]]waste[[:space:]]data[[:space:]]collection.pdf filter=lfs diff=lfs merge=lfs -text
|
39 |
+
pdfs/mygov-999999999489028046.pdf filter=lfs diff=lfs merge=lfs -text
|
Dockerfile.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY ./requirements.txt /code/requirements.txt
|
6 |
+
|
7 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
+
|
9 |
+
COPY . .
|
10 |
+
|
11 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
|
3 |
+
import torch
|
4 |
+
import theme
|
5 |
+
|
6 |
+
theme = theme.Theme()
|
7 |
+
|
8 |
+
import os
|
9 |
+
import sys
|
10 |
+
sys.path.append('../..')
|
11 |
+
|
12 |
+
#langchain
|
13 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
|
14 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
15 |
+
from langchain.prompts import PromptTemplate
|
16 |
+
from langchain.chains import RetrievalQA
|
17 |
+
from langchain.prompts import ChatPromptTemplate
|
18 |
+
from langchain.schema import StrOutputParser
|
19 |
+
from langchain.schema.runnable import Runnable
|
20 |
+
from langchain.schema.runnable.config import RunnableConfig
|
21 |
+
from langchain.chains import (
|
22 |
+
LLMChain, ConversationalRetrievalChain)
|
23 |
+
from langchain.vectorstores import Chroma
|
24 |
+
from langchain.memory import ConversationBufferMemory
|
25 |
+
from langchain.chains import LLMChain
|
26 |
+
from langchain.prompts.prompt import PromptTemplate
|
27 |
+
from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate
|
28 |
+
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, MessagesPlaceholder
|
29 |
+
from langchain.document_loaders import PyPDFDirectoryLoader
|
30 |
+
from pydantic import BaseModel, Field
|
31 |
+
from langchain.output_parsers import PydanticOutputParser
|
32 |
+
from langchain_community.llms import HuggingFaceHub
|
33 |
+
from langchain_community.document_loaders import WebBaseLoader
|
34 |
+
|
35 |
+
from pydantic import BaseModel
|
36 |
+
import shutil
|
37 |
+
|
38 |
+
|
39 |
+
|
40 |
+
custom_title = "<span style='color: rgb(243, 239, 224);'>Green Greta</span>"
|
41 |
+
|
42 |
+
|
43 |
+
# Cell 1: Image Classification Model
|
44 |
+
image_pipeline = pipeline(task="image-classification", model="guillen/vit-basura-test1")
|
45 |
+
|
46 |
+
def predict_image(input_img):
|
47 |
+
predictions = image_pipeline(input_img)
|
48 |
+
return {p["label"]: p["score"] for p in predictions}
|
49 |
+
|
50 |
+
image_gradio_app = gr.Interface(
|
51 |
+
fn=predict_image,
|
52 |
+
inputs=gr.Image(label="Image", sources=['upload', 'webcam'], type="pil"),
|
53 |
+
outputs=[gr.Label(label="Result")],
|
54 |
+
title=custom_title,
|
55 |
+
theme=theme
|
56 |
+
)
|
57 |
+
|
58 |
+
loader = WebBaseLoader(["https://www.epa.gov/recycle/frequent-questions-recycling", "https://www.whitehorsedc.gov.uk/vale-of-white-horse-district-council/recycling-rubbish-and-waste/lets-get-real-about-recycling/", "https://www.teimas.com/blog/13-preguntas-y-respuestas-sobre-la-ley-de-residuos-07-2022", "https://www.molok.com/es/blog/gestion-de-residuos-solidos-urbanos-rsu-10-dudas-comunes"])
|
59 |
+
data=loader.load()
|
60 |
+
# split documents
|
61 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
62 |
+
chunk_size=1024,
|
63 |
+
chunk_overlap=150,
|
64 |
+
length_function=len
|
65 |
+
)
|
66 |
+
docs = text_splitter.split_documents(data)
|
67 |
+
# define embedding
|
68 |
+
embeddings = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
|
69 |
+
# create vector database from data
|
70 |
+
persist_directory = 'docs/chroma/'
|
71 |
+
|
72 |
+
# Remove old database files if any
|
73 |
+
shutil.rmtree(persist_directory, ignore_errors=True)
|
74 |
+
vectordb = Chroma.from_documents(
|
75 |
+
documents=docs,
|
76 |
+
embedding=embeddings,
|
77 |
+
persist_directory=persist_directory
|
78 |
+
)
|
79 |
+
# define retriever
|
80 |
+
retriever = vectordb.as_retriever(search_kwargs={"k": 2}, search_type="mmr")
|
81 |
+
|
82 |
+
class FinalAnswer(BaseModel):
|
83 |
+
question: str = Field(description="the original question")
|
84 |
+
answer: str = Field(description="the extracted answer")
|
85 |
+
|
86 |
+
# Assuming you have a parser for the FinalAnswer class
|
87 |
+
parser = PydanticOutputParser(pydantic_object=FinalAnswer)
|
88 |
+
|
89 |
+
template = """
|
90 |
+
Your name is Greta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish /
|
91 |
+
Use the following pieces of context to answer the question /
|
92 |
+
If the question is English answer in English /
|
93 |
+
If the question is Spanish answer in Spanish /
|
94 |
+
Do not mention the word context when you answer a question /
|
95 |
+
Answer the question fully and provide as much relevant detail as possible. Do not cut your response short /
|
96 |
+
Context: {context}
|
97 |
+
User: {question}
|
98 |
+
{format_instructions}
|
99 |
+
"""
|
100 |
+
|
101 |
+
# Create the chat prompt templates
|
102 |
+
sys_prompt = SystemMessagePromptTemplate.from_template(template)
|
103 |
+
qa_prompt = ChatPromptTemplate(
|
104 |
+
messages=[
|
105 |
+
sys_prompt,
|
106 |
+
HumanMessagePromptTemplate.from_template("{question}")],
|
107 |
+
partial_variables={"format_instructions": parser.get_format_instructions()}
|
108 |
+
)
|
109 |
+
llm = HuggingFaceHub(
|
110 |
+
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
111 |
+
task="text-generation",
|
112 |
+
model_kwargs={
|
113 |
+
"max_new_tokens": 2000,
|
114 |
+
"top_k": 30,
|
115 |
+
"temperature": 0.1,
|
116 |
+
"repetition_penalty": 1.03
|
117 |
+
},
|
118 |
+
)
|
119 |
+
|
120 |
+
qa_chain = ConversationalRetrievalChain.from_llm(
|
121 |
+
llm = llm,
|
122 |
+
memory = ConversationBufferMemory(llm=llm, memory_key="chat_history", input_key='question', output_key='output'),
|
123 |
+
retriever = retriever,
|
124 |
+
verbose = True,
|
125 |
+
combine_docs_chain_kwargs={'prompt': qa_prompt},
|
126 |
+
get_chat_history = lambda h : h,
|
127 |
+
rephrase_question = False,
|
128 |
+
output_key = 'output',
|
129 |
+
)
|
130 |
+
|
131 |
+
def chat_interface(question,history):
|
132 |
+
result = qa_chain.invoke({'question': question})
|
133 |
+
output_string = result['output']
|
134 |
+
|
135 |
+
# Find the index of the last occurrence of "answer": in the string
|
136 |
+
answer_index = output_string.rfind('"answer":')
|
137 |
+
|
138 |
+
# Extract the substring starting from the "answer": index
|
139 |
+
answer_part = output_string[answer_index + len('"answer":'):].strip()
|
140 |
+
|
141 |
+
# Find the next occurrence of a double quote to get the start of the answer value
|
142 |
+
quote_index = answer_part.find('"')
|
143 |
+
|
144 |
+
# Extract the answer value between double quotes
|
145 |
+
answer_value = answer_part[quote_index + 1:answer_part.find('"', quote_index + 1)]
|
146 |
+
|
147 |
+
return answer_value
|
148 |
+
|
149 |
+
|
150 |
+
chatbot_gradio_app = gr.ChatInterface(
|
151 |
+
fn=chat_interface,
|
152 |
+
title=custom_title
|
153 |
+
)
|
154 |
+
|
155 |
+
# Combine both interfaces into a single app
|
156 |
+
app = gr.TabbedInterface(
|
157 |
+
[image_gradio_app, chatbot_gradio_app],
|
158 |
+
tab_names=["Green Greta Image Classification","Green Greta Chat"],
|
159 |
+
theme=theme
|
160 |
+
)
|
161 |
+
|
162 |
+
app.queue()
|
163 |
+
app.launch()
|
pdfs/Acceptable-Ways-to-separate-and-dispose-of-garbage-and-recyclables.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17ded317a5f64e275957fe163573c814d32db17f1dd266bd1053c1b3fc156550
|
3 |
+
size 5411060
|
pdfs/Advice-on-recycling-and-resource-recovery-FINAL-REPORT.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a171f4d8d68655f216906aff6a7227483d6106df1d86ea6d7186c0832dafb73
|
3 |
+
size 9129696
|
pdfs/Guidance on municipal waste data collection.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e72519ac0a1f98c9fdbf355f39d1bd15ec1c1d56b6f760808f1a3e4b63e697f6
|
3 |
+
size 2500966
|
pdfs/mygov-999999999489028046.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d77c23ca4040122f8de56bbe113260f46da1621ba8e5bf5ce7de87dde447173b
|
3 |
+
size 1434758
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
torch
|
3 |
+
langchain
|
4 |
+
langchain-community
|
5 |
+
pypdf
|
6 |
+
pydantic
|
7 |
+
sentence-transformers
|
8 |
+
chromadb
|
9 |
+
gradio
|
10 |
+
beautifulsoup4
|
theme.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
from typing import Iterable
|
4 |
+
|
5 |
+
from gradio.themes.base import Base
|
6 |
+
from gradio.themes.utils import colors, fonts, sizes
|
7 |
+
|
8 |
+
|
9 |
+
class Theme(Base):
|
10 |
+
def __init__(
|
11 |
+
self,
|
12 |
+
*,
|
13 |
+
primary_hue: colors.Color | str = colors.lime,
|
14 |
+
secondary_hue: colors.Color | str = colors.emerald,
|
15 |
+
neutral_hue: colors.Color | str = colors.stone,
|
16 |
+
spacing_size: sizes.Size | str = sizes.spacing_lg,
|
17 |
+
radius_size: sizes.Size | str = sizes.radius_none,
|
18 |
+
text_size: sizes.Size | str = sizes.text_md,
|
19 |
+
font: fonts.Font | str | Iterable[fonts.Font | str] = (
|
20 |
+
fonts.GoogleFont("Quicksand"),
|
21 |
+
"ui-sans-serif",
|
22 |
+
"system-ui",
|
23 |
+
"sans-serif",
|
24 |
+
),
|
25 |
+
font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
|
26 |
+
fonts.GoogleFont("IBM Plex Mono"),
|
27 |
+
"ui-monospace",
|
28 |
+
"Consolas",
|
29 |
+
"monospace",
|
30 |
+
),
|
31 |
+
):
|
32 |
+
super().__init__(
|
33 |
+
primary_hue=primary_hue,
|
34 |
+
secondary_hue=secondary_hue,
|
35 |
+
neutral_hue=neutral_hue,
|
36 |
+
spacing_size=spacing_size,
|
37 |
+
radius_size=radius_size,
|
38 |
+
text_size=text_size,
|
39 |
+
font=font,
|
40 |
+
font_mono=font_mono,
|
41 |
+
)
|
42 |
+
self.name = "theme"
|
43 |
+
super().set(
|
44 |
+
# Colors
|
45 |
+
slider_color="*neutral_900",
|
46 |
+
slider_color_dark="*neutral_500",
|
47 |
+
body_text_color="rgb(18,13,5)",
|
48 |
+
block_label_text_color="rgb(243, 239, 224)",
|
49 |
+
block_title_text_color="rgb(243, 239, 224)",
|
50 |
+
body_text_color_subdued="*neutral_400",
|
51 |
+
body_background_fill='*primary_800',
|
52 |
+
background_fill_primary='*primary_600',
|
53 |
+
background_fill_primary_dark='*primary_900',
|
54 |
+
background_fill_secondary_dark='*primary_900',
|
55 |
+
block_background_fill='rgb(53,66,48)',
|
56 |
+
block_background_fill_dark="*neutral_800",
|
57 |
+
input_background_fill_dark="*neutral_700",
|
58 |
+
# Button Colors
|
59 |
+
button_primary_background_fill="rgb(53,66,48)",
|
60 |
+
button_primary_background_fill_hover='*primary_200',
|
61 |
+
button_primary_text_color='*primary_600',
|
62 |
+
button_primary_background_fill_dark="*neutral_600",
|
63 |
+
button_primary_background_fill_hover_dark="*neutral_600",
|
64 |
+
button_primary_text_color_dark="white",
|
65 |
+
button_secondary_background_fill="*button_primary_background_fill",
|
66 |
+
button_secondary_background_fill_hover="*button_primary_background_fill_hover",
|
67 |
+
button_secondary_text_color="*button_primary_text_color",
|
68 |
+
button_cancel_background_fill="*button_primary_background_fill",
|
69 |
+
button_cancel_background_fill_hover="*button_primary_background_fill_hover",
|
70 |
+
button_cancel_text_color="*button_primary_text_color",
|
71 |
+
checkbox_label_background_fill="*button_primary_background_fill",
|
72 |
+
checkbox_label_background_fill_hover="*button_primary_background_fill_hover",
|
73 |
+
checkbox_label_text_color="*button_primary_text_color",
|
74 |
+
checkbox_background_color_selected="*neutral_600",
|
75 |
+
checkbox_background_color_dark="*neutral_700",
|
76 |
+
checkbox_background_color_selected_dark="*neutral_700",
|
77 |
+
checkbox_border_color_selected_dark="*neutral_800",
|
78 |
+
# Padding
|
79 |
+
checkbox_label_padding="*spacing_md",
|
80 |
+
button_large_padding="*spacing_lg",
|
81 |
+
button_small_padding="*spacing_sm",
|
82 |
+
# Borders
|
83 |
+
block_border_width="0px",
|
84 |
+
block_border_width_dark="1px",
|
85 |
+
shadow_drop_lg="0 1px 4px 0 rgb(0 0 0 / 0.1)",
|
86 |
+
block_shadow="*shadow_drop_lg",
|
87 |
+
block_shadow_dark="none",
|
88 |
+
# Block Labels
|
89 |
+
block_title_text_weight="600",
|
90 |
+
block_label_text_weight="600",
|
91 |
+
block_label_text_size="*text_md",
|
92 |
+
)
|