MansoorSarookh commited on
Commit
93f8224
1 Parent(s): ea849eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -35
app.py CHANGED
@@ -3,6 +3,7 @@ from datasets import load_dataset
3
  import pandas as pd
4
  from transformers import pipeline
5
  import time
 
6
 
7
  # Constants
8
  universities_url = "https://www.4icu.org/top-universities-world/"
@@ -10,13 +11,37 @@ universities_url = "https://www.4icu.org/top-universities-world/"
10
  # Load datasets with caching to optimize performance
11
  @st.cache_resource
12
  def load_datasets():
13
- ds_jobs = load_dataset("lukebarousse/data_jobs")
14
- ds_courses = load_dataset("azrai99/coursera-course-dataset")
15
- ds_custom_courses = pd.read_csv("final_cleaned_merged_coursera_courses.csv")
16
- ds_custom_jobs = pd.read_csv("merged_data_science_jobs.csv")
17
- ds_custom_universities = pd.read_csv("merged_university_data_cleaned (1).csv")
18
- return ds_jobs, ds_courses, ds_custom_courses, ds_custom_jobs, ds_custom_universities
19
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  ds_jobs, ds_courses, ds_custom_courses, ds_custom_jobs, ds_custom_universities = load_datasets()
21
 
22
  # Initialize the pipeline with caching
@@ -99,19 +124,21 @@ if "profile_data" in st.session_state:
99
  time.sleep(2) # Simulate processing time
100
  job_recommendations = []
101
 
102
- # Find jobs from ds_jobs
103
- for job in ds_jobs["train"]:
104
- job_title = job.get("job_title_short", "Unknown Job Title")
105
- job_skills = job.get("job_skills", "") or ""
106
- if any(skill.lower() in job_skills.lower() for skill in st.session_state.profile_data["tech_skills"].split(",")):
107
- job_recommendations.append(job_title)
108
-
109
- # Find jobs from ds_custom_jobs
110
- for _, job in ds_custom_jobs.iterrows():
111
- job_title = job.get("job_title", "Unknown Job Title")
112
- job_skills = job.get("skills", "") or ""
113
- if any(skill.lower() in job_skills.lower() for skill in st.session_state.profile_data["tech_skills"].split(",")):
114
- job_recommendations.append(job_title)
 
 
115
 
116
  # Remove duplicates
117
  job_recommendations = list(set(job_recommendations))
@@ -129,21 +156,23 @@ if "profile_data" in st.session_state:
129
  time.sleep(2)
130
  course_recommendations = []
131
 
132
- # Find relevant courses in ds_courses
133
- for course in ds_courses["train"]:
134
- if any(interest.lower() in course.get("Course Name", "").lower() for interest in st.session_state.profile_data["interests"].split(",")):
135
- course_recommendations.append({
136
- "name": course.get("Course Name", "Unknown Course Title"),
137
- "url": course.get("Links", "#")
138
- })
139
-
140
- # Find relevant courses in ds_custom_courses
141
- for _, row in ds_custom_courses.iterrows():
142
- if any(interest.lower() in row["Course Name"].lower() for interest in st.session_state.profile_data["interests"].split(",")):
143
- course_recommendations.append({
144
- "name": row["Course Name"],
145
- "url": row.get("Links", "#")
146
- })
 
 
147
 
148
  # Remove duplicates
149
  course_recommendations = list({(course["name"], course["url"]) for course in course_recommendations})
 
3
  import pandas as pd
4
  from transformers import pipeline
5
  import time
6
+ import os
7
 
8
  # Constants
9
  universities_url = "https://www.4icu.org/top-universities-world/"
 
11
  # Load datasets with caching to optimize performance
12
  @st.cache_resource
13
  def load_datasets():
14
+ # Load datasets from Hugging Face
15
+ try:
16
+ ds_jobs = load_dataset("lukebarousse/data_jobs")
17
+ except ValueError as e:
18
+ st.error("Error loading 'lukebarousse/data_jobs'. Please ensure the dataset exists and is accessible.")
19
+ ds_jobs = None
20
+
21
+ try:
22
+ ds_courses = load_dataset("azrai99/coursera-course-dataset")
23
+ except ValueError as e:
24
+ st.error("Error loading 'azrai99/coursera-course-dataset'. Please ensure the dataset exists and is accessible.")
25
+ ds_courses = None
26
+
27
+ # Load local CSV files
28
+ csv_files = {
29
+ "ds_custom_courses": "final_cleaned_merged_coursera_courses.csv",
30
+ "ds_custom_jobs": "merged_data_science_jobs.csv",
31
+ "ds_custom_universities": "merged_university_data_cleaned (1).csv"
32
+ }
33
+ datasets = {}
34
+
35
+ for name, path in csv_files.items():
36
+ if os.path.isfile(path):
37
+ datasets[name] = pd.read_csv(path)
38
+ else:
39
+ st.warning(f"File '{path}' not found. Please check if it is available in the app directory.")
40
+ datasets[name] = None
41
+
42
+ return ds_jobs, ds_courses, datasets.get("ds_custom_courses"), datasets.get("ds_custom_jobs"), datasets.get("ds_custom_universities")
43
+
44
+ # Load datasets and handle None cases if they don't load
45
  ds_jobs, ds_courses, ds_custom_courses, ds_custom_jobs, ds_custom_universities = load_datasets()
46
 
47
  # Initialize the pipeline with caching
 
124
  time.sleep(2) # Simulate processing time
125
  job_recommendations = []
126
 
127
+ # Find jobs from ds_jobs if available
128
+ if ds_jobs:
129
+ for job in ds_jobs["train"]:
130
+ job_title = job.get("job_title_short", "Unknown Job Title")
131
+ job_skills = job.get("job_skills", "") or ""
132
+ if any(skill.lower() in job_skills.lower() for skill in st.session_state.profile_data["tech_skills"].split(",")):
133
+ job_recommendations.append(job_title)
134
+
135
+ # Find jobs from ds_custom_jobs if available
136
+ if ds_custom_jobs is not None:
137
+ for _, job in ds_custom_jobs.iterrows():
138
+ job_title = job.get("job_title", "Unknown Job Title")
139
+ job_skills = job.get("skills", "") or ""
140
+ if any(skill.lower() in job_skills.lower() for skill in st.session_state.profile_data["tech_skills"].split(",")):
141
+ job_recommendations.append(job_title)
142
 
143
  # Remove duplicates
144
  job_recommendations = list(set(job_recommendations))
 
156
  time.sleep(2)
157
  course_recommendations = []
158
 
159
+ # Find relevant courses in ds_courses if available
160
+ if ds_courses:
161
+ for course in ds_courses["train"]:
162
+ if any(interest.lower() in course.get("Course Name", "").lower() for interest in st.session_state.profile_data["interests"].split(",")):
163
+ course_recommendations.append({
164
+ "name": course.get("Course Name", "Unknown Course Title"),
165
+ "url": course.get("Links", "#")
166
+ })
167
+
168
+ # Find relevant courses in ds_custom_courses if available
169
+ if ds_custom_courses is not None:
170
+ for _, row in ds_custom_courses.iterrows():
171
+ if any(interest.lower() in row["Course Name"].lower() for interest in st.session_state.profile_data["interests"].split(",")):
172
+ course_recommendations.append({
173
+ "name": row["Course Name"],
174
+ "url": row.get("Links", "#")
175
+ })
176
 
177
  # Remove duplicates
178
  course_recommendations = list({(course["name"], course["url"]) for course in course_recommendations})