Spaces:
Sleeping
Sleeping
MansoorSarookh
commited on
Commit
•
93f8224
1
Parent(s):
ea849eb
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ from datasets import load_dataset
|
|
3 |
import pandas as pd
|
4 |
from transformers import pipeline
|
5 |
import time
|
|
|
6 |
|
7 |
# Constants
|
8 |
universities_url = "https://www.4icu.org/top-universities-world/"
|
@@ -10,13 +11,37 @@ universities_url = "https://www.4icu.org/top-universities-world/"
|
|
10 |
# Load datasets with caching to optimize performance
|
11 |
@st.cache_resource
|
12 |
def load_datasets():
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
ds_jobs, ds_courses, ds_custom_courses, ds_custom_jobs, ds_custom_universities = load_datasets()
|
21 |
|
22 |
# Initialize the pipeline with caching
|
@@ -99,19 +124,21 @@ if "profile_data" in st.session_state:
|
|
99 |
time.sleep(2) # Simulate processing time
|
100 |
job_recommendations = []
|
101 |
|
102 |
-
# Find jobs from ds_jobs
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
|
|
115 |
|
116 |
# Remove duplicates
|
117 |
job_recommendations = list(set(job_recommendations))
|
@@ -129,21 +156,23 @@ if "profile_data" in st.session_state:
|
|
129 |
time.sleep(2)
|
130 |
course_recommendations = []
|
131 |
|
132 |
-
# Find relevant courses in ds_courses
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
147 |
|
148 |
# Remove duplicates
|
149 |
course_recommendations = list({(course["name"], course["url"]) for course in course_recommendations})
|
|
|
3 |
import pandas as pd
|
4 |
from transformers import pipeline
|
5 |
import time
|
6 |
+
import os
|
7 |
|
8 |
# Constants
|
9 |
universities_url = "https://www.4icu.org/top-universities-world/"
|
|
|
11 |
# Load datasets with caching to optimize performance
|
12 |
@st.cache_resource
|
13 |
def load_datasets():
|
14 |
+
# Load datasets from Hugging Face
|
15 |
+
try:
|
16 |
+
ds_jobs = load_dataset("lukebarousse/data_jobs")
|
17 |
+
except ValueError as e:
|
18 |
+
st.error("Error loading 'lukebarousse/data_jobs'. Please ensure the dataset exists and is accessible.")
|
19 |
+
ds_jobs = None
|
20 |
+
|
21 |
+
try:
|
22 |
+
ds_courses = load_dataset("azrai99/coursera-course-dataset")
|
23 |
+
except ValueError as e:
|
24 |
+
st.error("Error loading 'azrai99/coursera-course-dataset'. Please ensure the dataset exists and is accessible.")
|
25 |
+
ds_courses = None
|
26 |
+
|
27 |
+
# Load local CSV files
|
28 |
+
csv_files = {
|
29 |
+
"ds_custom_courses": "final_cleaned_merged_coursera_courses.csv",
|
30 |
+
"ds_custom_jobs": "merged_data_science_jobs.csv",
|
31 |
+
"ds_custom_universities": "merged_university_data_cleaned (1).csv"
|
32 |
+
}
|
33 |
+
datasets = {}
|
34 |
+
|
35 |
+
for name, path in csv_files.items():
|
36 |
+
if os.path.isfile(path):
|
37 |
+
datasets[name] = pd.read_csv(path)
|
38 |
+
else:
|
39 |
+
st.warning(f"File '{path}' not found. Please check if it is available in the app directory.")
|
40 |
+
datasets[name] = None
|
41 |
+
|
42 |
+
return ds_jobs, ds_courses, datasets.get("ds_custom_courses"), datasets.get("ds_custom_jobs"), datasets.get("ds_custom_universities")
|
43 |
+
|
44 |
+
# Load datasets and handle None cases if they don't load
|
45 |
ds_jobs, ds_courses, ds_custom_courses, ds_custom_jobs, ds_custom_universities = load_datasets()
|
46 |
|
47 |
# Initialize the pipeline with caching
|
|
|
124 |
time.sleep(2) # Simulate processing time
|
125 |
job_recommendations = []
|
126 |
|
127 |
+
# Find jobs from ds_jobs if available
|
128 |
+
if ds_jobs:
|
129 |
+
for job in ds_jobs["train"]:
|
130 |
+
job_title = job.get("job_title_short", "Unknown Job Title")
|
131 |
+
job_skills = job.get("job_skills", "") or ""
|
132 |
+
if any(skill.lower() in job_skills.lower() for skill in st.session_state.profile_data["tech_skills"].split(",")):
|
133 |
+
job_recommendations.append(job_title)
|
134 |
+
|
135 |
+
# Find jobs from ds_custom_jobs if available
|
136 |
+
if ds_custom_jobs is not None:
|
137 |
+
for _, job in ds_custom_jobs.iterrows():
|
138 |
+
job_title = job.get("job_title", "Unknown Job Title")
|
139 |
+
job_skills = job.get("skills", "") or ""
|
140 |
+
if any(skill.lower() in job_skills.lower() for skill in st.session_state.profile_data["tech_skills"].split(",")):
|
141 |
+
job_recommendations.append(job_title)
|
142 |
|
143 |
# Remove duplicates
|
144 |
job_recommendations = list(set(job_recommendations))
|
|
|
156 |
time.sleep(2)
|
157 |
course_recommendations = []
|
158 |
|
159 |
+
# Find relevant courses in ds_courses if available
|
160 |
+
if ds_courses:
|
161 |
+
for course in ds_courses["train"]:
|
162 |
+
if any(interest.lower() in course.get("Course Name", "").lower() for interest in st.session_state.profile_data["interests"].split(",")):
|
163 |
+
course_recommendations.append({
|
164 |
+
"name": course.get("Course Name", "Unknown Course Title"),
|
165 |
+
"url": course.get("Links", "#")
|
166 |
+
})
|
167 |
+
|
168 |
+
# Find relevant courses in ds_custom_courses if available
|
169 |
+
if ds_custom_courses is not None:
|
170 |
+
for _, row in ds_custom_courses.iterrows():
|
171 |
+
if any(interest.lower() in row["Course Name"].lower() for interest in st.session_state.profile_data["interests"].split(",")):
|
172 |
+
course_recommendations.append({
|
173 |
+
"name": row["Course Name"],
|
174 |
+
"url": row.get("Links", "#")
|
175 |
+
})
|
176 |
|
177 |
# Remove duplicates
|
178 |
course_recommendations = list({(course["name"], course["url"]) for course in course_recommendations})
|