paloma99 commited on
Commit
9cdf6e8
1 Parent(s): 0459382

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ pdfs/Acceptable-Ways-to-separate-and-dispose-of-garbage-and-recyclables.pdf filter=lfs diff=lfs merge=lfs -text
37
+ pdfs/Advice-on-recycling-and-resource-recovery-FINAL-REPORT.pdf filter=lfs diff=lfs merge=lfs -text
38
+ pdfs/Guidance[[:space:]]on[[:space:]]municipal[[:space:]]waste[[:space:]]data[[:space:]]collection.pdf filter=lfs diff=lfs merge=lfs -text
39
+ pdfs/mygov-999999999489028046.pdf filter=lfs diff=lfs merge=lfs -text
Dockerfile.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+ import theme
5
+
6
+ theme = theme.Theme()
7
+
8
+ import os
9
+ import sys
10
+ sys.path.append('../..')
11
+
12
+ #langchain
13
+ from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
14
+ from langchain.embeddings import HuggingFaceEmbeddings
15
+ from langchain.prompts import PromptTemplate
16
+ from langchain.chains import RetrievalQA
17
+ from langchain.prompts import ChatPromptTemplate
18
+ from langchain.schema import StrOutputParser
19
+ from langchain.schema.runnable import Runnable
20
+ from langchain.schema.runnable.config import RunnableConfig
21
+ from langchain.chains import (
22
+ LLMChain, ConversationalRetrievalChain)
23
+ from langchain.vectorstores import Chroma
24
+ from langchain.memory import ConversationBufferMemory
25
+ from langchain.chains import LLMChain
26
+ from langchain.prompts.prompt import PromptTemplate
27
+ from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate
28
+ from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, MessagesPlaceholder
29
+ from langchain.document_loaders import PyPDFDirectoryLoader
30
+ from pydantic import BaseModel, Field
31
+ from langchain.output_parsers import PydanticOutputParser
32
+ from langchain_community.llms import HuggingFaceHub
33
+ from langchain_community.document_loaders import WebBaseLoader
34
+
35
+ from pydantic import BaseModel
36
+ import shutil
37
+
38
+
39
+
40
+ custom_title = "<span style='color: rgb(243, 239, 224);'>Green Greta</span>"
41
+
42
+
43
+ # Cell 1: Image Classification Model
44
+ image_pipeline = pipeline(task="image-classification", model="guillen/vit-basura-test1")
45
+
46
+ def predict_image(input_img):
47
+ predictions = image_pipeline(input_img)
48
+ return {p["label"]: p["score"] for p in predictions}
49
+
50
+ image_gradio_app = gr.Interface(
51
+ fn=predict_image,
52
+ inputs=gr.Image(label="Image", sources=['upload', 'webcam'], type="pil"),
53
+ outputs=[gr.Label(label="Result")],
54
+ title=custom_title,
55
+ theme=theme
56
+ )
57
+
58
+ loader = WebBaseLoader(["https://www.epa.gov/recycle/frequent-questions-recycling", "https://www.whitehorsedc.gov.uk/vale-of-white-horse-district-council/recycling-rubbish-and-waste/lets-get-real-about-recycling/", "https://www.teimas.com/blog/13-preguntas-y-respuestas-sobre-la-ley-de-residuos-07-2022", "https://www.molok.com/es/blog/gestion-de-residuos-solidos-urbanos-rsu-10-dudas-comunes"])
59
+ data=loader.load()
60
+ # split documents
61
+ text_splitter = RecursiveCharacterTextSplitter(
62
+ chunk_size=1024,
63
+ chunk_overlap=150,
64
+ length_function=len
65
+ )
66
+ docs = text_splitter.split_documents(data)
67
+ # define embedding
68
+ embeddings = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
69
+ # create vector database from data
70
+ persist_directory = 'docs/chroma/'
71
+
72
+ # Remove old database files if any
73
+ shutil.rmtree(persist_directory, ignore_errors=True)
74
+ vectordb = Chroma.from_documents(
75
+ documents=docs,
76
+ embedding=embeddings,
77
+ persist_directory=persist_directory
78
+ )
79
+ # define retriever
80
+ retriever = vectordb.as_retriever(search_kwargs={"k": 2}, search_type="mmr")
81
+
82
+ class FinalAnswer(BaseModel):
83
+ question: str = Field(description="the original question")
84
+ answer: str = Field(description="the extracted answer")
85
+
86
+ # Assuming you have a parser for the FinalAnswer class
87
+ parser = PydanticOutputParser(pydantic_object=FinalAnswer)
88
+
89
+ template = """
90
+ Your name is Greta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish /
91
+ Use the following pieces of context to answer the question /
92
+ If the question is English answer in English /
93
+ If the question is Spanish answer in Spanish /
94
+ Do not mention the word context when you answer a question /
95
+ Answer the question fully and provide as much relevant detail as possible. Do not cut your response short /
96
+ Context: {context}
97
+ User: {question}
98
+ {format_instructions}
99
+ """
100
+
101
+ # Create the chat prompt templates
102
+ sys_prompt = SystemMessagePromptTemplate.from_template(template)
103
+ qa_prompt = ChatPromptTemplate(
104
+ messages=[
105
+ sys_prompt,
106
+ HumanMessagePromptTemplate.from_template("{question}")],
107
+ partial_variables={"format_instructions": parser.get_format_instructions()}
108
+ )
109
+ llm = HuggingFaceHub(
110
+ repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
111
+ task="text-generation",
112
+ model_kwargs={
113
+ "max_new_tokens": 2000,
114
+ "top_k": 30,
115
+ "temperature": 0.1,
116
+ "repetition_penalty": 1.03
117
+ },
118
+ )
119
+
120
+ qa_chain = ConversationalRetrievalChain.from_llm(
121
+ llm = llm,
122
+ memory = ConversationBufferMemory(llm=llm, memory_key="chat_history", input_key='question', output_key='output'),
123
+ retriever = retriever,
124
+ verbose = True,
125
+ combine_docs_chain_kwargs={'prompt': qa_prompt},
126
+ get_chat_history = lambda h : h,
127
+ rephrase_question = False,
128
+ output_key = 'output',
129
+ )
130
+
131
+ def chat_interface(question,history):
132
+ result = qa_chain.invoke({'question': question})
133
+ output_string = result['output']
134
+
135
+ # Find the index of the last occurrence of "answer": in the string
136
+ answer_index = output_string.rfind('"answer":')
137
+
138
+ # Extract the substring starting from the "answer": index
139
+ answer_part = output_string[answer_index + len('"answer":'):].strip()
140
+
141
+ # Find the next occurrence of a double quote to get the start of the answer value
142
+ quote_index = answer_part.find('"')
143
+
144
+ # Extract the answer value between double quotes
145
+ answer_value = answer_part[quote_index + 1:answer_part.find('"', quote_index + 1)]
146
+
147
+ return answer_value
148
+
149
+
150
+ chatbot_gradio_app = gr.ChatInterface(
151
+ fn=chat_interface,
152
+ title=custom_title
153
+ )
154
+
155
+ # Combine both interfaces into a single app
156
+ app = gr.TabbedInterface(
157
+ [image_gradio_app, chatbot_gradio_app],
158
+ tab_names=["Green Greta Image Classification","Green Greta Chat"],
159
+ theme=theme
160
+ )
161
+
162
+ app.queue()
163
+ app.launch()
pdfs/Acceptable-Ways-to-separate-and-dispose-of-garbage-and-recyclables.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17ded317a5f64e275957fe163573c814d32db17f1dd266bd1053c1b3fc156550
3
+ size 5411060
pdfs/Advice-on-recycling-and-resource-recovery-FINAL-REPORT.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a171f4d8d68655f216906aff6a7227483d6106df1d86ea6d7186c0832dafb73
3
+ size 9129696
pdfs/Guidance on municipal waste data collection.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e72519ac0a1f98c9fdbf355f39d1bd15ec1c1d56b6f760808f1a3e4b63e697f6
3
+ size 2500966
pdfs/mygov-999999999489028046.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d77c23ca4040122f8de56bbe113260f46da1621ba8e5bf5ce7de87dde447173b
3
+ size 1434758
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ langchain
4
+ langchain-community
5
+ pypdf
6
+ pydantic
7
+ sentence-transformers
8
+ chromadb
9
+ gradio
10
+ beautifulsoup4
theme.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Iterable
4
+
5
+ from gradio.themes.base import Base
6
+ from gradio.themes.utils import colors, fonts, sizes
7
+
8
+
9
+ class Theme(Base):
10
+ def __init__(
11
+ self,
12
+ *,
13
+ primary_hue: colors.Color | str = colors.lime,
14
+ secondary_hue: colors.Color | str = colors.emerald,
15
+ neutral_hue: colors.Color | str = colors.stone,
16
+ spacing_size: sizes.Size | str = sizes.spacing_lg,
17
+ radius_size: sizes.Size | str = sizes.radius_none,
18
+ text_size: sizes.Size | str = sizes.text_md,
19
+ font: fonts.Font | str | Iterable[fonts.Font | str] = (
20
+ fonts.GoogleFont("Quicksand"),
21
+ "ui-sans-serif",
22
+ "system-ui",
23
+ "sans-serif",
24
+ ),
25
+ font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
26
+ fonts.GoogleFont("IBM Plex Mono"),
27
+ "ui-monospace",
28
+ "Consolas",
29
+ "monospace",
30
+ ),
31
+ ):
32
+ super().__init__(
33
+ primary_hue=primary_hue,
34
+ secondary_hue=secondary_hue,
35
+ neutral_hue=neutral_hue,
36
+ spacing_size=spacing_size,
37
+ radius_size=radius_size,
38
+ text_size=text_size,
39
+ font=font,
40
+ font_mono=font_mono,
41
+ )
42
+ self.name = "theme"
43
+ super().set(
44
+ # Colors
45
+ slider_color="*neutral_900",
46
+ slider_color_dark="*neutral_500",
47
+ body_text_color="rgb(18,13,5)",
48
+ block_label_text_color="rgb(243, 239, 224)",
49
+ block_title_text_color="rgb(243, 239, 224)",
50
+ body_text_color_subdued="*neutral_400",
51
+ body_background_fill='*primary_800',
52
+ background_fill_primary='*primary_600',
53
+ background_fill_primary_dark='*primary_900',
54
+ background_fill_secondary_dark='*primary_900',
55
+ block_background_fill='rgb(53,66,48)',
56
+ block_background_fill_dark="*neutral_800",
57
+ input_background_fill_dark="*neutral_700",
58
+ # Button Colors
59
+ button_primary_background_fill="rgb(53,66,48)",
60
+ button_primary_background_fill_hover='*primary_200',
61
+ button_primary_text_color='*primary_600',
62
+ button_primary_background_fill_dark="*neutral_600",
63
+ button_primary_background_fill_hover_dark="*neutral_600",
64
+ button_primary_text_color_dark="white",
65
+ button_secondary_background_fill="*button_primary_background_fill",
66
+ button_secondary_background_fill_hover="*button_primary_background_fill_hover",
67
+ button_secondary_text_color="*button_primary_text_color",
68
+ button_cancel_background_fill="*button_primary_background_fill",
69
+ button_cancel_background_fill_hover="*button_primary_background_fill_hover",
70
+ button_cancel_text_color="*button_primary_text_color",
71
+ checkbox_label_background_fill="*button_primary_background_fill",
72
+ checkbox_label_background_fill_hover="*button_primary_background_fill_hover",
73
+ checkbox_label_text_color="*button_primary_text_color",
74
+ checkbox_background_color_selected="*neutral_600",
75
+ checkbox_background_color_dark="*neutral_700",
76
+ checkbox_background_color_selected_dark="*neutral_700",
77
+ checkbox_border_color_selected_dark="*neutral_800",
78
+ # Padding
79
+ checkbox_label_padding="*spacing_md",
80
+ button_large_padding="*spacing_lg",
81
+ button_small_padding="*spacing_sm",
82
+ # Borders
83
+ block_border_width="0px",
84
+ block_border_width_dark="1px",
85
+ shadow_drop_lg="0 1px 4px 0 rgb(0 0 0 / 0.1)",
86
+ block_shadow="*shadow_drop_lg",
87
+ block_shadow_dark="none",
88
+ # Block Labels
89
+ block_title_text_weight="600",
90
+ block_label_text_weight="600",
91
+ block_label_text_size="*text_md",
92
+ )