Spaces:

MansoorSarookh
/

CareerCounsellingApp

Sleeping

App Files Files Community

MansoorSarookh commited on 8 days ago

Commit

93f8224

•

1 Parent(s): ea849eb

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -35

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from datasets import load_dataset
 import pandas as pd
 from transformers import pipeline
 import time
 # Constants
 universities_url = "https://www.4icu.org/top-universities-world/"
@@ -10,13 +11,37 @@ universities_url = "https://www.4icu.org/top-universities-world/"
 # Load datasets with caching to optimize performance
 @st.cache_resource
 def load_datasets():
-    ds_jobs = load_dataset("lukebarousse/data_jobs")
-    ds_courses = load_dataset("azrai99/coursera-course-dataset")
-    ds_custom_courses = pd.read_csv("final_cleaned_merged_coursera_courses.csv")
-    ds_custom_jobs = pd.read_csv("merged_data_science_jobs.csv")
-    ds_custom_universities = pd.read_csv("merged_university_data_cleaned (1).csv")
-    return ds_jobs, ds_courses, ds_custom_courses, ds_custom_jobs, ds_custom_universities
 ds_jobs, ds_courses, ds_custom_courses, ds_custom_jobs, ds_custom_universities = load_datasets()
 # Initialize the pipeline with caching
@@ -99,19 +124,21 @@ if "profile_data" in st.session_state:
             time.sleep(2)  # Simulate processing time
             job_recommendations = []
-            # Find jobs from ds_jobs
-            for job in ds_jobs["train"]:
-                job_title = job.get("job_title_short", "Unknown Job Title")
-                job_skills = job.get("job_skills", "") or ""
-                if any(skill.lower() in job_skills.lower() for skill in st.session_state.profile_data["tech_skills"].split(",")):
-                    job_recommendations.append(job_title)
-            # Find jobs from ds_custom_jobs
-            for _, job in ds_custom_jobs.iterrows():
-                job_title = job.get("job_title", "Unknown Job Title")
-                job_skills = job.get("skills", "") or ""
-                if any(skill.lower() in job_skills.lower() for skill in st.session_state.profile_data["tech_skills"].split(",")):
-                    job_recommendations.append(job_title)
             # Remove duplicates
             job_recommendations = list(set(job_recommendations))
@@ -129,21 +156,23 @@ if "profile_data" in st.session_state:
             time.sleep(2)
             course_recommendations = []
-            # Find relevant courses in ds_courses
-            for course in ds_courses["train"]:
-                if any(interest.lower() in course.get("Course Name", "").lower() for interest in st.session_state.profile_data["interests"].split(",")):
-                    course_recommendations.append({
-                        "name": course.get("Course Name", "Unknown Course Title"),
-                        "url": course.get("Links", "#")
-                    })
-            # Find relevant courses in ds_custom_courses
-            for _, row in ds_custom_courses.iterrows():
-                if any(interest.lower() in row["Course Name"].lower() for interest in st.session_state.profile_data["interests"].split(",")):
-                    course_recommendations.append({
-                        "name": row["Course Name"],
-                        "url": row.get("Links", "#")
-                    })
             # Remove duplicates
             course_recommendations = list({(course["name"], course["url"]) for course in course_recommendations})

 import pandas as pd
 from transformers import pipeline
 import time
+import os
 # Constants
 universities_url = "https://www.4icu.org/top-universities-world/"
 # Load datasets with caching to optimize performance
 @st.cache_resource
 def load_datasets():
+    # Load datasets from Hugging Face
+    try:
+        ds_jobs = load_dataset("lukebarousse/data_jobs")
+    except ValueError as e:
+        st.error("Error loading 'lukebarousse/data_jobs'. Please ensure the dataset exists and is accessible.")
+        ds_jobs = None
+    try:
+        ds_courses = load_dataset("azrai99/coursera-course-dataset")
+    except ValueError as e:
+        st.error("Error loading 'azrai99/coursera-course-dataset'. Please ensure the dataset exists and is accessible.")
+        ds_courses = None
+    # Load local CSV files
+    csv_files = {
+        "ds_custom_courses": "final_cleaned_merged_coursera_courses.csv",
+        "ds_custom_jobs": "merged_data_science_jobs.csv",
+        "ds_custom_universities": "merged_university_data_cleaned (1).csv"
+    }
+    datasets = {}
+    for name, path in csv_files.items():
+        if os.path.isfile(path):
+            datasets[name] = pd.read_csv(path)
+        else:
+            st.warning(f"File '{path}' not found. Please check if it is available in the app directory.")
+            datasets[name] = None
+    return ds_jobs, ds_courses, datasets.get("ds_custom_courses"), datasets.get("ds_custom_jobs"), datasets.get("ds_custom_universities")
+# Load datasets and handle None cases if they don't load
 ds_jobs, ds_courses, ds_custom_courses, ds_custom_jobs, ds_custom_universities = load_datasets()
 # Initialize the pipeline with caching
             time.sleep(2)  # Simulate processing time
             job_recommendations = []
+            # Find jobs from ds_jobs if available
+            if ds_jobs:
+                for job in ds_jobs["train"]:
+                    job_title = job.get("job_title_short", "Unknown Job Title")
+                    job_skills = job.get("job_skills", "") or ""
+                    if any(skill.lower() in job_skills.lower() for skill in st.session_state.profile_data["tech_skills"].split(",")):
+                        job_recommendations.append(job_title)
+            # Find jobs from ds_custom_jobs if available
+            if ds_custom_jobs is not None:
+                for _, job in ds_custom_jobs.iterrows():
+                    job_title = job.get("job_title", "Unknown Job Title")
+                    job_skills = job.get("skills", "") or ""
+                    if any(skill.lower() in job_skills.lower() for skill in st.session_state.profile_data["tech_skills"].split(",")):
+                        job_recommendations.append(job_title)
             # Remove duplicates
             job_recommendations = list(set(job_recommendations))
             time.sleep(2)
             course_recommendations = []
+            # Find relevant courses in ds_courses if available
+            if ds_courses:
+                for course in ds_courses["train"]:
+                    if any(interest.lower() in course.get("Course Name", "").lower() for interest in st.session_state.profile_data["interests"].split(",")):
+                        course_recommendations.append({
+                            "name": course.get("Course Name", "Unknown Course Title"),
+                            "url": course.get("Links", "#")
+                        })
+            # Find relevant courses in ds_custom_courses if available
+            if ds_custom_courses is not None:
+                for _, row in ds_custom_courses.iterrows():
+                    if any(interest.lower() in row["Course Name"].lower() for interest in st.session_state.profile_data["interests"].split(",")):
+                        course_recommendations.append({
+                            "name": row["Course Name"],
+                            "url": row.get("Links", "#")
+                        })
             # Remove duplicates
             course_recommendations = list({(course["name"], course["url"]) for course in course_recommendations})