Spaces:

paloma99
/

test-space-2

Sleeping

App Files Files Community

paloma99 commited on Feb 8

Commit

9cdf6e8

•

1 Parent(s): 0459382

Upload 8 files

Browse files

Files changed (9) hide show

.gitattributes +4 -0
Dockerfile.txt +11 -0
app.py +163 -0
pdfs/Acceptable-Ways-to-separate-and-dispose-of-garbage-and-recyclables.pdf +3 -0
pdfs/Advice-on-recycling-and-resource-recovery-FINAL-REPORT.pdf +3 -0
pdfs/Guidance on municipal waste data collection.pdf +3 -0
pdfs/mygov-999999999489028046.pdf +3 -0
requirements.txt +10 -0
theme.py +92 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+pdfs/Acceptable-Ways-to-separate-and-dispose-of-garbage-and-recyclables.pdf filter=lfs diff=lfs merge=lfs -text
+pdfs/Advice-on-recycling-and-resource-recovery-FINAL-REPORT.pdf filter=lfs diff=lfs merge=lfs -text
+pdfs/Guidance[[:space:]]on[[:space:]]municipal[[:space:]]waste[[:space:]]data[[:space:]]collection.pdf filter=lfs diff=lfs merge=lfs -text
+pdfs/mygov-999999999489028046.pdf filter=lfs diff=lfs merge=lfs -text

Dockerfile.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.10
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import gradio as gr
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
+import torch
+import theme
+theme = theme.Theme()
+import os
+import sys
+sys.path.append('../..')
+#langchain
+from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.prompts import PromptTemplate
+from langchain.chains import RetrievalQA
+from langchain.prompts import ChatPromptTemplate
+from langchain.schema import StrOutputParser
+from langchain.schema.runnable import Runnable
+from langchain.schema.runnable.config import RunnableConfig
+from langchain.chains import (
+    LLMChain, ConversationalRetrievalChain)
+from langchain.vectorstores import Chroma
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import LLMChain
+from langchain.prompts.prompt import PromptTemplate
+from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate
+from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate,  MessagesPlaceholder
+from langchain.document_loaders import PyPDFDirectoryLoader
+from pydantic import BaseModel, Field
+from langchain.output_parsers import PydanticOutputParser
+from langchain_community.llms import HuggingFaceHub
+from langchain_community.document_loaders import WebBaseLoader
+from pydantic import BaseModel
+import shutil
+custom_title = "<span style='color: rgb(243, 239, 224);'>Green Greta</span>"
+# Cell 1: Image Classification Model
+image_pipeline = pipeline(task="image-classification", model="guillen/vit-basura-test1")
+def predict_image(input_img):
+    predictions = image_pipeline(input_img)
+    return {p["label"]: p["score"] for p in predictions}
+image_gradio_app = gr.Interface(
+    fn=predict_image,
+    inputs=gr.Image(label="Image", sources=['upload', 'webcam'], type="pil"),
+    outputs=[gr.Label(label="Result")],
+    title=custom_title,
+    theme=theme
+)
+loader = WebBaseLoader(["https://www.epa.gov/recycle/frequent-questions-recycling", "https://www.whitehorsedc.gov.uk/vale-of-white-horse-district-council/recycling-rubbish-and-waste/lets-get-real-about-recycling/", "https://www.teimas.com/blog/13-preguntas-y-respuestas-sobre-la-ley-de-residuos-07-2022", "https://www.molok.com/es/blog/gestion-de-residuos-solidos-urbanos-rsu-10-dudas-comunes"])
+data=loader.load()
+# split documents
+text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size=1024,
+    chunk_overlap=150,
+    length_function=len
+)
+docs = text_splitter.split_documents(data)
+# define embedding
+embeddings = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
+# create vector database from data
+persist_directory = 'docs/chroma/'
+# Remove old database files if any
+shutil.rmtree(persist_directory, ignore_errors=True)
+vectordb = Chroma.from_documents(
+    documents=docs,
+    embedding=embeddings,
+    persist_directory=persist_directory
+)
+# define retriever
+retriever = vectordb.as_retriever(search_kwargs={"k": 2}, search_type="mmr")
+class FinalAnswer(BaseModel):
+    question: str = Field(description="the original question")
+    answer: str = Field(description="the extracted answer")
+# Assuming you have a parser for the FinalAnswer class
+parser = PydanticOutputParser(pydantic_object=FinalAnswer)
+template = """
+Your name is Greta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish /
+Use the following pieces of context to answer the question /
+If the question is English answer in English /
+If the question is Spanish answer in Spanish /
+Do not mention the word context when you answer a question /
+Answer the question fully and provide as much relevant detail as possible. Do not cut your response short /
+Context: {context}
+User: {question}
+{format_instructions}
+"""
+# Create the chat prompt templates
+sys_prompt = SystemMessagePromptTemplate.from_template(template)
+qa_prompt = ChatPromptTemplate(
+    messages=[
+        sys_prompt,
+        HumanMessagePromptTemplate.from_template("{question}")],
+    partial_variables={"format_instructions": parser.get_format_instructions()}
+)
+llm = HuggingFaceHub(
+    repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
+    task="text-generation",
+    model_kwargs={
+        "max_new_tokens": 2000,
+        "top_k": 30,
+        "temperature": 0.1,
+        "repetition_penalty": 1.03
+    },
+)
+qa_chain = ConversationalRetrievalChain.from_llm(
+    llm = llm,
+    memory = ConversationBufferMemory(llm=llm, memory_key="chat_history", input_key='question', output_key='output'),
+    retriever = retriever,
+    verbose = True,
+    combine_docs_chain_kwargs={'prompt': qa_prompt},
+    get_chat_history = lambda h : h,
+    rephrase_question = False,
+    output_key = 'output',
+)
+def chat_interface(question,history):
+    result = qa_chain.invoke({'question': question})
+    output_string = result['output']
+    # Find the index of the last occurrence of "answer": in the string
+    answer_index = output_string.rfind('"answer":')
+    # Extract the substring starting from the "answer": index
+    answer_part = output_string[answer_index + len('"answer":'):].strip()
+    # Find the next occurrence of a double quote to get the start of the answer value
+    quote_index = answer_part.find('"')
+    # Extract the answer value between double quotes
+    answer_value = answer_part[quote_index + 1:answer_part.find('"', quote_index + 1)]
+    return answer_value
+chatbot_gradio_app = gr.ChatInterface(
+    fn=chat_interface,
+    title=custom_title
+)
+# Combine both interfaces into a single app
+app = gr.TabbedInterface(
+    [image_gradio_app, chatbot_gradio_app],
+    tab_names=["Green Greta Image Classification","Green Greta Chat"],
+    theme=theme
+)
+app.queue()
+app.launch()

pdfs/Acceptable-Ways-to-separate-and-dispose-of-garbage-and-recyclables.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17ded317a5f64e275957fe163573c814d32db17f1dd266bd1053c1b3fc156550
+size 5411060

pdfs/Advice-on-recycling-and-resource-recovery-FINAL-REPORT.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a171f4d8d68655f216906aff6a7227483d6106df1d86ea6d7186c0832dafb73
+size 9129696

pdfs/Guidance on municipal waste data collection.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e72519ac0a1f98c9fdbf355f39d1bd15ec1c1d56b6f760808f1a3e4b63e697f6
+size 2500966

pdfs/mygov-999999999489028046.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d77c23ca4040122f8de56bbe113260f46da1621ba8e5bf5ce7de87dde447173b
+size 1434758

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+transformers
+torch
+langchain
+langchain-community
+pypdf
+pydantic
+sentence-transformers
+chromadb
+gradio
+beautifulsoup4

theme.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from __future__ import annotations
+from typing import Iterable
+from gradio.themes.base import Base
+from gradio.themes.utils import colors, fonts, sizes
+class Theme(Base):
+    def __init__(
+        self,
+        *,
+        primary_hue: colors.Color | str = colors.lime,
+        secondary_hue: colors.Color | str = colors.emerald,
+        neutral_hue: colors.Color | str = colors.stone,
+        spacing_size: sizes.Size | str = sizes.spacing_lg,
+        radius_size: sizes.Size | str = sizes.radius_none,
+        text_size: sizes.Size | str = sizes.text_md,
+        font: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("Quicksand"),
+            "ui-sans-serif",
+            "system-ui",
+            "sans-serif",
+        ),
+        font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("IBM Plex Mono"),
+            "ui-monospace",
+            "Consolas",
+            "monospace",
+        ),
+    ):
+        super().__init__(
+            primary_hue=primary_hue,
+            secondary_hue=secondary_hue,
+            neutral_hue=neutral_hue,
+            spacing_size=spacing_size,
+            radius_size=radius_size,
+            text_size=text_size,
+            font=font,
+            font_mono=font_mono,
+        )
+        self.name = "theme"
+        super().set(
+            # Colors
+            slider_color="*neutral_900",
+            slider_color_dark="*neutral_500",
+            body_text_color="rgb(18,13,5)",
+            block_label_text_color="rgb(243, 239, 224)",
+            block_title_text_color="rgb(243, 239, 224)",
+            body_text_color_subdued="*neutral_400",
+            body_background_fill='*primary_800',
+            background_fill_primary='*primary_600',
+            background_fill_primary_dark='*primary_900',
+            background_fill_secondary_dark='*primary_900',
+            block_background_fill='rgb(53,66,48)',
+            block_background_fill_dark="*neutral_800",
+            input_background_fill_dark="*neutral_700",
+            # Button Colors
+            button_primary_background_fill="rgb(53,66,48)",
+            button_primary_background_fill_hover='*primary_200',
+            button_primary_text_color='*primary_600',
+            button_primary_background_fill_dark="*neutral_600",
+            button_primary_background_fill_hover_dark="*neutral_600",
+            button_primary_text_color_dark="white",
+            button_secondary_background_fill="*button_primary_background_fill",
+            button_secondary_background_fill_hover="*button_primary_background_fill_hover",
+            button_secondary_text_color="*button_primary_text_color",
+            button_cancel_background_fill="*button_primary_background_fill",
+            button_cancel_background_fill_hover="*button_primary_background_fill_hover",
+            button_cancel_text_color="*button_primary_text_color",
+            checkbox_label_background_fill="*button_primary_background_fill",
+            checkbox_label_background_fill_hover="*button_primary_background_fill_hover",
+            checkbox_label_text_color="*button_primary_text_color",
+            checkbox_background_color_selected="*neutral_600",
+            checkbox_background_color_dark="*neutral_700",
+            checkbox_background_color_selected_dark="*neutral_700",
+            checkbox_border_color_selected_dark="*neutral_800",
+            # Padding
+            checkbox_label_padding="*spacing_md",
+            button_large_padding="*spacing_lg",
+            button_small_padding="*spacing_sm",
+            # Borders
+            block_border_width="0px",
+            block_border_width_dark="1px",
+            shadow_drop_lg="0 1px 4px 0 rgb(0 0 0 / 0.1)",
+            block_shadow="*shadow_drop_lg",
+            block_shadow_dark="none",
+            # Block Labels
+            block_title_text_weight="600",
+            block_label_text_weight="600",
+            block_label_text_size="*text_md",
+        )