LangchainBot-space-creator

Runtime error

App Files Files Community

Slach

ysharma HF staff commited on Mar 16, 2023

Commit

2164147

•

0 Parent(s):

Duplicate from ysharma/LangchainBot-space-creator

Browse files

Co-authored-by: yuvraj sharma <[email protected]>

Files changed (6) hide show

.gitattributes +34 -0
README.md +14 -0
app.py +173 -0
requirements.txt +5 -0
template/app_og.py +80 -0
template/requirements.txt +6 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: LangchainBot space creator
+emoji: 🌌🔨
+colorFrom: red
+colorTo: indigo
+sdk: gradio
+sdk_version: 3.10.1
+app_file: app.py
+pinned: false
+license: mit
+duplicated_from: ysharma/LangchainBot-space-creator
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,173 @@

+from langchain.llms import OpenAI
+from langchain.chains.qa_with_sources import load_qa_with_sources_chain
+from langchain.docstore.document import Document
+import requests
+import pathlib
+import subprocess
+import tempfile
+import os
+import gradio as gr
+import pickle
+from huggingface_hub import HfApi, upload_folder
+from huggingface_hub import whoami, list_models
+# using a vector space for our search
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.vectorstores.faiss import FAISS
+from langchain.text_splitter import CharacterTextSplitter
+#Code for extracting the markdown fies from a Repo
+#To get markdowns from github for any/your repo
+def get_github_docs(repo_link):
+    repo_owner, repo_name = repo_link.split('/')[-2], repo_link.split('/')[-1]
+    with tempfile.TemporaryDirectory() as d:
+        subprocess.check_call(
+            f"git clone https://github.com/{repo_owner}/{repo_name}.git .",
+            cwd=d,
+            shell=True,
+        )
+        git_sha = (
+            subprocess.check_output("git rev-parse HEAD", shell=True, cwd=d)
+            .decode("utf-8")
+            .strip()
+        )
+        repo_path = pathlib.Path(d)
+        markdown_files = list(repo_path.rglob("*.md")) + list(
+            repo_path.rglob("*.mdx")
+        )
+        for markdown_file in markdown_files:
+            try:
+                with open(markdown_file, "r") as f:
+                    relative_path = markdown_file.relative_to(repo_path)
+                    github_url = f"https://github.com/{repo_owner}/{repo_name}/blob/{git_sha}/{relative_path}"
+                    yield Document(page_content=f.read(), metadata={"source": github_url})
+            except FileNotFoundError:
+                print(f"Could not open file: {markdown_file}")
+#Code for  creating a new space for the user
+def create_space(repo_link, hf_token):
+    repo_name = repo_link.split('/')[-1]
+    api = HfApi(token=hf_token)
+    repo_url = api.create_repo(
+                repo_id=f'LangChain_{repo_name}Bot',  #example - ysharma/LangChain_GradioBot
+                exist_ok = True,
+                repo_type="space",
+                space_sdk="gradio",
+                private=False)
+#Code for creating the search index
+#Saving search index to disk
+def create_search_index(repo_link, openai_api_key):
+    sources = get_github_docs(repo_link)
+    source_chunks = []
+    splitter = CharacterTextSplitter(separator=" ", chunk_size=1024, chunk_overlap=0)
+    for source in sources:
+        for chunk in splitter.split_text(source.page_content):
+            source_chunks.append(Document(page_content=chunk, metadata=source.metadata))
+    search_index = FAISS.from_documents(source_chunks, OpenAIEmbeddings(openai_api_key=openai_api_key))
+    #saving FAISS search index to disk
+    with open("search_index.pickle", "wb") as f:
+            pickle.dump(search_index, f)
+    return "search_index.pickle"
+def upload_files_to_space(repo_link, hf_token):
+    repo_name = repo_link.split('/')[-1]
+    api = HfApi(token=hf_token)
+    user_name = whoami(token=hf_token)['name']
+    #Replacing the repo namein app.py
+    with open("template/app_og.py", "r") as f:
+        app = f.read()
+    app = app.replace("$RepoName", repo_name)
+    #Saving the new app.py file to disk
+    with open("template/app.py", "w") as f:
+        f.write(app)
+    #Uploading the new app.py to the new space
+    api.upload_file(
+                path_or_fileobj = "template/app.py",
+                path_in_repo = "app.py",
+                repo_id = f'{user_name}/LangChain_{repo_name}Bot', #model_id,
+                token = hf_token,
+                repo_type="space",)
+    #Uploading the new search_index file to the new space
+    api.upload_file(
+                path_or_fileobj = "search_index.pickle",
+                path_in_repo = "search_index.pickle",
+                repo_id = f'{user_name}/LangChain_{repo_name}Bot', #model_id,
+                token = hf_token,
+                repo_type="space",)
+    #Upload requirements.txt to the space
+    api.upload_file(
+                path_or_fileobj="template/requirements.txt",
+                path_in_repo="requirements.txt",
+                repo_id=f'{user_name}/LangChain_{repo_name}Bot', #model_id,
+                token=hf_token,
+                repo_type="space",)
+    #Deleting the files - search_index and app.py file
+    os.remove("template/app.py")
+    os.remove("search_index.pickle")
+    repo_url = f"https://huggingface.co/spaces/{user_name}/LangChain_{repo_name}Bot"
+    space_name = f"{user_name}/LangChain_{repo_name}Bot"
+    return "<p style='color: orange; text-align: center; font-size: 24px; background-color: lightgray;'>🎉Congratulations🎉 Chatbot created successfully! Access it here : <a href="+ repo_url + " target='_blank'>" + space_name + "</a></p>"
+def driver(repo_link, hf_token):
+    #create search index openai_api_key=openai_api_key
+    #search_index_pickle = create_search_index(repo_link, openai_api_key)
+    #create a new space
+    create_space(repo_link, hf_token)
+    #upload files to the new space
+    html_tag = upload_files_to_space(repo_link, hf_token)
+    print(f"html tag is : {html_tag}")
+    return html_tag
+def set_state():
+    return gr.update(visible=True), gr.update(visible=True)
+#Gradio code for Repo as input and search index as output file
+with gr.Blocks() as demo:
+    gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
+        <div
+        style="
+            display: inline-flex;
+            align-items: center;
+            gap: 0.8rem;
+            font-size: 1.75rem;
+        "
+        >
+        <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
+            QandA Chatbot Creator for Github Repos - Automation done using LangChain, Gradio, and Spaces
+        </h1>
+        </div>
+        <p style="margin-bottom: 10px; font-size: 94%">
+        Generate a top-notch <b>Q&A Chatbot</b> for your Github Repo, using <a href="https://langchain.readthedocs.io/en/latest/" target="_blank">LangChain</a> and <a href="https://github.com/gradio-app/gradio" target="_blank">Gradio</a>.
+        Paste your Github repository link, enter your OpenAI API key, and the app will create a FAISS embedding vector space for you.
+        Next, input your Huggingface Token and press the final button.<br><br>
+        Your new chatbot will be ready under your Huggingface profile, accessible via the displayed link.
+        <center><a href="https://huggingface.co/spaces/ysharma/LangchainBot-space-creator?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></center>
+        </p>
+    </div>""")
+    with gr.Row() :
+        with gr.Column():
+            repo_link = gr.Textbox(label="Enter Github repo name")
+            openai_api_key = gr.Textbox(type='password', label="Enter your OpenAI API key here")
+        btn_faiss = gr.Button("Create Search index")
+    search_index_file = gr.File(label= 'Search index vector')
+    with gr.Row():
+        hf_token_in = gr.Textbox(type='password', label="Enter hf-token name", visible=False)
+        btn_create_space = gr.Button("Create Your Chatbot", visible=False)
+    html_out = gr.HTML()
+    btn_faiss.click(create_search_index, [repo_link, openai_api_key],search_index_file )
+    btn_faiss.click(fn=set_state, inputs=[] , outputs=[hf_token_in, btn_create_space])
+    btn_create_space.click(driver, [repo_link, hf_token_in], html_out)
+demo.queue()
+demo.launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+langchain==0.0.55
+requests
+openai
+transformers
+faiss-cpu

template/app_og.py ADDED Viewed

	@@ -0,0 +1,80 @@

+from langchain.llms import OpenAI
+from langchain.chains.qa_with_sources import load_qa_with_sources_chain
+from langchain.docstore.document import Document
+import requests
+import pathlib
+import subprocess
+import tempfile
+import os
+import gradio as gr
+import pickle
+# using a vector space for our search
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.vectorstores.faiss import FAISS
+from langchain.text_splitter import CharacterTextSplitter
+#loading FAISS search index from disk
+with open("search_index.pickle", "rb") as f:
+    search_index = pickle.load(f)
+#Get GPT3 response using Langchain
+def print_answer(question, openai):   #openai_embeddings
+    #search_index = get_search_index()
+    chain = load_qa_with_sources_chain(openai) #(OpenAI(temperature=0))
+    response = (
+        chain(
+            {
+                "input_documents": search_index.similarity_search(question, k=4),
+                "question": question,
+            },
+            return_only_outputs=True,
+        )["output_text"]
+    )
+    if len(response.split('\n')[-1].split())>2:
+        response = response.split('\n')[0] + ', '.join([' <a href="' + response.split('\n')[-1].split()[i] + '" target="_blank"><u>Click Link' + str(i) + '</u></a>' for i in range(1,len(response.split('\n')[-1].split()))])
+    else:
+        response = response.split('\n')[0] + ' <a href="' + response.split('\n')[-1].split()[-1] + '" target="_blank"><u>Click Link</u></a>'
+    return response
+def chat(message, history, openai_api_key):
+    #openai_embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
+    openai = OpenAI(temperature=0, openai_api_key=openai_api_key )
+    #os.environ["OPENAI_API_KEY"] = openai_api_key
+    history = history or []
+    message = message.lower()
+    response = print_answer(message, openai)   #openai_embeddings
+    history.append((message, response))
+    return history, history
+with gr.Blocks() as demo:
+  gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
+        <div
+        style="
+            display: inline-flex;
+            align-items: center;
+            gap: 0.8rem;
+            font-size: 1.75rem;
+        "
+        >
+        <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
+            $RepoName QandA - LangChain Bot
+        </h1>
+        </div>
+        <p style="margin-bottom: 10px; font-size: 94%">
+        Hi, I'm a Q and A $RepoName expert bot, start by typing in your OpenAI API key, questions/issues you are facing in your $RepoName implementations and then press enter.<br>
+        <a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate Space with GPU Upgrade for fast Inference & no queue<br>
+        Built using <a href="https://langchain.readthedocs.io/en/latest/" target="_blank">LangChain</a> and <a href="https://github.com/gradio-app/gradio" target="_blank">Gradio</a> for the $RepoName Repo
+        </p>
+    </div>""")
+  with gr.Row():
+    question = gr.Textbox(label = 'Type in your questions about $RepoName here and press Enter!', placeholder = 'What questions do you want to ask about the $RepoName library?')
+    openai_api_key = gr.Textbox(type='password', label="Enter your OpenAI API key here")
+  state = gr.State()
+  chatbot = gr.Chatbot()
+  question.submit(chat, [question, state, openai_api_key], [chatbot, state])
+if __name__ == "__main__":
+    demo.launch()

template/requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+langchain==0.0.55
+requests
+openai
+transformers
+huggingface_hub
+faiss-cpu