Dhruv commited on
Commit
c44d252
1 Parent(s): 1de6db8
.env ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ OPENAI_API_KEY='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'
2
+ SOURCEGRAPH='sgp_62048514ef3b8d1fd1f6233bd43b37fdb682d1d2'
3
+ CELERY_BROKER_URL='amqps://lslvixbt:[email protected]/lslvixbt'
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ bioml/
LLM_middleware.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+
4
+ from typing import List, Optional
5
+
6
+ import openai
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM
8
+
9
+
10
+ class LLM_Middleware():
11
+ hf_key: str
12
+ def __init__(self, openai_key, hf) -> None:
13
+ openai.key = openai_key
14
+ self.hf_key = hf
README.md CHANGED
@@ -8,6 +8,10 @@ sdk_version: 3.32.0
8
  app_file: app.py
9
  pinned: false
10
  license: bigscience-openrail-m
 
 
 
11
  ---
 
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
8
  app_file: app.py
9
  pinned: false
10
  license: bigscience-openrail-m
11
+ full_width: true
12
+ tags: ["python", "transformers", "devops", "bioinformatics", ]
13
+
14
  ---
15
+ ## BioML hackathon submission: building search engine for ressurecting archieved / old packages 🪦
16
 
17
+ This repo will integrate the current promonent packages being deployed as transformers to develop the builds for the software and resolve any issues in the current package.
__pycache__/app.cpython-311.pyc ADDED
Binary file (1.7 kB). View file
 
__pycache__/default_params.cpython-311.pyc ADDED
Binary file (2.33 kB). View file
 
__pycache__/gh_api_info.cpython-311.pyc ADDED
Binary file (13.1 kB). View file
 
app.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio import TabbedInterface
3
+ from gh_api_info import GithubRepoAccess
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ import requests
6
+ from dotenv import dotenv_values
7
+
8
+ def getRepoInformation(input_queries):
9
+ config = dotenv_values(".env")
10
+
11
+ obj = GithubRepoAccess(config["GITHUB_TOKEN"])
12
+ repo = obj.getRepo(input_queries)
13
+
14
+ return repo
15
+
16
+
17
+
18
+ def ressurectingFrontend():
19
+ search = gr.Interface(
20
+ fn= searchGithub,
21
+ inputs= gr.inputs.Textbox(lines=1, label="Search"),
22
+ outputs= gr.outputs.Textbox(label="Output"),
23
+ title="Search Github",
24
+ description="build and ressurect old github inputs",
25
+ article="follow the link to github repo and our submission in bioML",
26
+ )
27
+
28
+
29
+ if __name__ == "__main__":
30
+ ressurectingFrontend.launch()
31
+
32
+
default_params.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ import os
3
+
4
+ DEFAULT_ACCESS_TOKEN = os.environ.get("GH_ACCESS_TOKEN")
5
+ DEFAULT_USERNAME = "matthew-mcateer"
6
+ DEFAULT_REPO_URL = "https://github.com/kkroening/ffmpeg-python"
7
+ DEFAULT_DOCKERFILE = """# Use the official Python base image
8
+ FROM python:3.9-slim
9
+
10
+ # Set the working directory
11
+ WORKDIR /app
12
+
13
+ # Install FFmpeg
14
+ RUN apt-get update && \
15
+ apt-get install -y --no-install-recommends ffmpeg && \
16
+ rm -rf /var/lib/apt/lists/*
17
+
18
+ # Clone the ffmpeg-python repository
19
+ RUN apt-get update && \
20
+ apt-get install -y --no-install-recommends git && \
21
+ git clone https://github.com/kkroening/ffmpeg-python.git /app/ffmpeg-python && \
22
+ rm -rf /var/lib/apt/lists/*
23
+
24
+ # Install the required dependencies
25
+ RUN pip install --no-cache-dir -r /app/ffmpeg-python/requirements.txt
26
+
27
+ # Optional: Set the entrypoint for the container
28
+ ENTRYPOINT ["python"]
29
+ """
30
+ DEFAULT_DEVCONTAINER_JSON = """{
31
+ "name": "ffmpeg-python-dev-container",
32
+ "dockerFile": "Dockerfile",
33
+ "settings": {
34
+ "terminal.integrated.shell.linux": "/bin/bash"
35
+ },
36
+ "extensions": [
37
+ "ms-python.python"
38
+ ],
39
+ "forwardPorts": [],
40
+ "postCreateCommand": "echo 'Welcome to your ffmpeg-python dev container!'"
41
+ }
42
+ """
43
+
44
+ DEFAULT_SAMPLE_SCRIPT = """
45
+ import requests
46
+ import ffmpeg
47
+ import tempfile
48
+
49
+ # Download a video file from the internet
50
+ video_url = 'https://download.samplelib.com/mp4/sample-5s.mp4'
51
+
52
+ response = requests.get(video_url, stream=True)
53
+ response.raise_for_status()
54
+
55
+ # Save the video to a temporary file
56
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video_file:
57
+ for chunk in response.iter_content(chunk_size=8192):
58
+ temp_video_file.write(chunk)
59
+ temp_video_file.flush()
60
+
61
+ # Process the video using ffmpeg-python
62
+ input_video = ffmpeg.input(temp_video_file.name)
63
+ output_video = input_video.filter('scale', 320, 240).output('output_video.mp4')
64
+ output_video.run()
65
+
66
+ print("Video processing completed. The output video is saved as output_video.mp4.")
67
+ """
68
+
gh_api_info.py ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ import base64
3
+ import random
4
+ import requests
5
+
6
+ from requests import Response
7
+ from typing import Any, Dict, Optional
8
+
9
+ from github import Github
10
+
11
+ from default_params import (
12
+ DEFAULT_ACCESS_TOKEN,
13
+ DEFAULT_DEVCONTAINER_JSON,
14
+ DEFAULT_DOCKERFILE,
15
+ DEFAULT_REPO_URL,
16
+ DEFAULT_SAMPLE_SCRIPT,
17
+ DEFAULT_USERNAME,
18
+ )
19
+
20
+
21
+ class GithubRepoAccess:
22
+ access_token: str
23
+ gh: Github
24
+ headers = {}
25
+
26
+ def __init__(self, access_token) -> None:
27
+ self.access_token = access_token
28
+ self.gh = Github(self.access_token)
29
+ self.headers["Authorization"] = self.access_token
30
+
31
+
32
+
33
+ ## function that uses the github api for
34
+ def clone_repository(self,repo_name: str) -> any:
35
+ repo_details = self.gh.get_repo(repo_name)
36
+ repo_url = repo_details.clone_url
37
+
38
+ repo_details = requests.get(repo_url, headers=self.headers)
39
+ ## getting principal branch for cloning the file
40
+
41
+ branch = self.gh.get_repo(repo_name)
42
+ clone_repository_url = (
43
+ f"https://api.github.com/repos/{repo_owner}/{repo_name}/tarball/{branch}"
44
+ )
45
+
46
+
47
+ ## cloning the repository
48
+ clone_repository_response = requests.get(clone_repository_url, headers=self.headers)
49
+
50
+
51
+ def repo_exists(access_token: str, repo_name: str) -> bool:
52
+
53
+ for repo in gh.get_user().get_repos():
54
+ if repo.name == repo_name:
55
+ print("Repo exists!!")
56
+ return True
57
+ return False
58
+
59
+
60
+ def fork_repository(
61
+ username: str, repo_owner: str, repo_name: str, headers: dict[str, str]
62
+ ) -> Optional[Dict[str, Any]]:
63
+ fork_api_url = (
64
+ f"https://api.github.com/repos/{repo_owner}/{repo_name}/forks"
65
+ )
66
+ fork_response = requests.post(fork_api_url, headers=self.headers)
67
+
68
+ if fork_response.status_code == 202:
69
+ print("Repository forked successfully.")
70
+ return fork_response.json()
71
+ else:
72
+ print("Error forking the repository.")
73
+ print("Status code:", fork_response.status_code)
74
+ print("Error message:", fork_response.json())
75
+ return None
76
+
77
+
78
+ def create_new_branch(
79
+ username: str,
80
+ repo_name: str,
81
+ new_branch_name: str,
82
+ headers: dict[str, str],
83
+ ) -> None:
84
+ api_base_url: str = f"https://api.github.com/repos/{username}/{repo_name}"
85
+ branches_api_url: str = f"{api_base_url}/git/refs/heads"
86
+
87
+ branches_response: Response = requests.get(
88
+ branches_api_url, headers=self.headers
89
+ )
90
+ branches = branches_response.json()
91
+
92
+ main_branch_sha = None
93
+ for branch in branches:
94
+ if branch["ref"] == "refs/heads/main":
95
+ main_branch_sha = branch["object"]["sha"]
96
+ break
97
+
98
+ if not main_branch_sha:
99
+ print("Error: Couldn't find the main branch.")
100
+ return
101
+
102
+ new_branch_data: dict[str, Any] = {
103
+ "ref": f"refs/heads/{new_branch_name}",
104
+ "sha": main_branch_sha,
105
+ }
106
+
107
+ new_branch_response: Response = requests.post(
108
+ branches_api_url, headers=self.headers, json=new_branch_data
109
+ )
110
+
111
+ if new_branch_response.status_code == 201:
112
+ print(f"New branch '{new_branch_name}' created successfully.")
113
+ else:
114
+ print("Error creating the new branch.")
115
+ print("Status code:", new_branch_response.status_code)
116
+ print("Error message:", new_branch_response.json())
117
+
118
+
119
+ def commit_files_to_branch(
120
+ self,
121
+ repo_owner: str,
122
+ repo_name: str,
123
+ new_branch_name: str,
124
+ devcontainer_json_content: str,
125
+ dockerfile_content: str,
126
+ sample_script_content: str,
127
+ headers: dict[str, str],
128
+ ) -> None:
129
+ api_base_url: str = (
130
+ f"https://api.github.com/repos/{repo_owner}/{repo_name}"
131
+ )
132
+
133
+ # Get default branch and its commit SHA
134
+ repo_info = requests.get(api_base_url, headers=self.headers).json()
135
+ print(repo_info)
136
+ default_branch = repo_info["default_branch"]
137
+ default_branch_sha = requests.get(
138
+ f"{api_base_url}/git/ref/heads/master", headers=self.headers
139
+ ).json()["object"]["sha"]
140
+
141
+ devcontainer_json_blob_sha = requests.post(
142
+ f"{api_base_url}/git/blobs",
143
+ headers=self.headers,
144
+ json={
145
+ "content": base64.b64encode(
146
+ devcontainer_json_content.encode()
147
+ ).decode(),
148
+ "encoding": "base64",
149
+ },
150
+ ).json()["sha"]
151
+
152
+ dockerfile_blob_sha = requests.post(
153
+ f"{api_base_url}/git/blobs",
154
+ headers=self.headers,
155
+ json={
156
+ "content": base64.b64encode(dockerfile_content.encode()).decode(),
157
+ "encoding": "base64",
158
+ },
159
+ ).json()["sha"]
160
+
161
+ sample_script_blob_sha = requests.post(
162
+ f"{api_base_url}/git/blobs",
163
+ headers=self.headers,
164
+ json={
165
+ "content": base64.b64encode(
166
+ sample_script_content.encode()
167
+ ).decode(),
168
+ "encoding": "base64",
169
+ },
170
+ ).json()["sha"]
171
+
172
+ # Get latest commit tree
173
+ latest_commit_tree_sha = requests.get(
174
+ f"{api_base_url}/git/commits/{default_branch_sha}", headers=self.headers
175
+ ).json()["tree"]["sha"]
176
+ print("Latest commit tree SHA:", latest_commit_tree_sha)
177
+
178
+ # Create a new tree with the new blobs
179
+ new_tree_response = requests.post(
180
+ f"{api_base_url}/git/trees",
181
+ headers=self.headers,
182
+ json={
183
+ "base_tree": latest_commit_tree_sha,
184
+ "tree": [
185
+ {
186
+ "path": ".devcontainer/devcontainer.json",
187
+ "mode": "100644",
188
+ "type": "blob",
189
+ "sha": devcontainer_json_blob_sha,
190
+ },
191
+ {
192
+ "path": ".devcontainer/Dockerfile",
193
+ "mode": "100644",
194
+ "type": "blob",
195
+ "sha": dockerfile_blob_sha,
196
+ },
197
+ {
198
+ "path": "sample_script.py",
199
+ "mode": "100644",
200
+ "type": "blob",
201
+ "sha": sample_script_blob_sha,
202
+ },
203
+ ],
204
+ },
205
+ )
206
+
207
+ if new_tree_response.status_code == 201:
208
+ new_tree = new_tree_response.json()
209
+ print("New tree created successfully.")
210
+ print("New tree SHA:", new_tree["sha"])
211
+ else:
212
+ print("Error creating the new tree.")
213
+ print("Status code:", new_tree_response.status_code)
214
+ print("Error message:", new_tree_response.json())
215
+ exit(1)
216
+
217
+ # Create a new commit with the new tree
218
+ new_commit_response = requests.post(
219
+ f"{api_base_url}/git/commits",
220
+ headers=self.headers,
221
+ json={
222
+ "message": "Add devcontainer.json and Dockerfile",
223
+ "tree": new_tree["sha"],
224
+ "parents": [default_branch_sha],
225
+ },
226
+ )
227
+
228
+ if new_commit_response.status_code == 201:
229
+ new_commit = new_commit_response.json()
230
+ print("New commit created successfully.")
231
+ print("New commit SHA:", new_commit["sha"])
232
+ else:
233
+ print("Error creating the new commit.")
234
+ print("Status code:", new_commit_response.status_code)
235
+ print("Error message:", new_commit_response.json())
236
+ exit(1)
237
+
238
+ # Create new branch on the forked repository with the new commit SHA
239
+ new_branch_ref = f"refs/heads/{new_branch_name}"
240
+ create_branch_response = requests.post(
241
+ f"{api_base_url}/git/refs",
242
+ headers=self.headers,
243
+ json={"ref": new_branch_ref, "sha": new_commit["sha"]},
244
+ )
245
+
246
+ if create_branch_response.status_code == 201:
247
+ print(
248
+ f"New branch '{new_branch_name}' created successfully on the forked repository with devcontainer.json and Dockerfile."
249
+ )
250
+ else:
251
+ print("Error creating the new branch on the forked repository.")
252
+ print("Status code:", create_branch_response.status_code)
253
+ print("Error message:", create_branch_response.json())
254
+ exit(1)
255
+
256
+
257
+ def create_codespace(
258
+ repo_owner: str,
259
+ repo_name: str,
260
+ new_branch_name: str,
261
+ headers: dict[str, str],
262
+ ) -> str:
263
+ api_base_url: str = (
264
+ f"https://api.github.com/repos/{repo_owner}/{repo_name}/codespaces"
265
+ )
266
+
267
+ create_codespace_payload = {"ref": new_branch_name}
268
+
269
+ create_codespace_response = requests.post(
270
+ api_base_url, headers=self.headers, json=create_codespace_payload
271
+ )
272
+
273
+ if create_codespace_response.status_code == 201:
274
+ print(
275
+ "Codespace creation request is successful. Waiting for the Codespace to be created..."
276
+ )
277
+ codespace = create_codespace_response.json()
278
+
279
+ # Poll the Codespace's status until it becomes 'available'
280
+ codespace_id = codespace["id"]
281
+ print(codespace_id)
282
+ print(codespace)
283
+
284
+ codespace_status = codespace["state"]
285
+ print(codespace_status)
286
+ return codespace_id
287
+ # while codespace_status != 'Available':
288
+ # time.sleep(10)
289
+ # codespace_response = requests.get(f'{api_base_url}/{codespace_id}', headers=self.headers)
290
+ # codespace = codespace_response.json()
291
+ # import ipdb
292
+ # ipdb.set_trace()
293
+ # codespace_status = codespace['state']
294
+ # print(f"Current Codespace status: {codespace_status}")
295
+ #
296
+ # print(f"Codespace is available! ID: {codespace_id}")
297
+
298
+ else:
299
+ print("Error creating the Codespace.")
300
+ print("Status code:", create_codespace_response.status_code)
301
+ print("Error message:", create_codespace_response.json())
302
+
303
+
304
+ def create_codespace_with_files(
305
+ username: str,
306
+ access_token: str,
307
+ repo_url: str,
308
+ docker_file: str,
309
+ devcontainer_json: str,
310
+ sample_script: str,
311
+ ) -> str:
312
+ # Extract repository owner and name from the repo URL
313
+ repo_parts = repo_url.split("/")
314
+ repo_owner = repo_parts[-2]
315
+ repo_name = repo_parts[-1].replace(".git", "")
316
+
317
+ # Configure headers for the GitHub API
318
+ headers = {
319
+ "Authorization": f"token {access_token}",
320
+ "Accept": "application/vnd.github+json",
321
+ "Content-Type": "application/json",
322
+ }
323
+
324
+ if not repo_exists(access_token, repo_name):
325
+ # Fork the repository
326
+ forked_repo = fork_repository(username, repo_owner, repo_name, headers)
327
+ print("Forked!")
328
+
329
+ # Create a new branch in the forked repository
330
+ new_branch_name = "devcontainer-setup-" + str(random.randint(1, 1000))
331
+ # create_new_branch(username, repo_name, new_branch_name, headers)
332
+
333
+ # Commit devcontainer.json, Dockerfile, and sample_script to the new branch
334
+ commit_files_to_branch(
335
+ username,
336
+ repo_name,
337
+ new_branch_name,
338
+ devcontainer_json,
339
+ docker_file,
340
+ sample_script,
341
+ headers,
342
+ )
343
+ print("Branch created and committed files")
344
+
345
+ # Create a new Codespace using the new branch
346
+ codespace_id = create_codespace(
347
+ username, repo_name, new_branch_name, headers
348
+ )
349
+
350
+ return codespace_id
351
+
352
+
353
+ if __name__ == "__main__":
354
+ create_codespace_with_files(
355
+ username=DEFAULT_USERNAME,
356
+ access_token=DEFAULT_ACCESS_TOKEN,
357
+ repo_url=DEFAULT_REPO_URL,
358
+ docker_file=DEFAULT_DOCKERFILE,
359
+ devcontainer_json=DEFAULT_DEVCONTAINER_JSON,
360
+ sample_script=DEFAULT_SAMPLE_SCRIPT,
361
+ )
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ PyGithub
2
+ transformers
3
+ gradio
4
+ python-dotenv
5
+ tensorflow