import requests from bs4 import BeautifulSoup import pandas as pd import gradio as gr from groq import Groq # Step 1: Scrape free courses from Analytics Vidhya def fetch_free_courses(): url = "https://courses.analyticsvidhya.com/pages/all-free-courses" response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') courses_data = [] # Extract course details for card in soup.select('header.course-card__img-container'): image_element = card.find('img', class_='course-card__img') if image_element: title = image_element.get('alt') img_url = image_element.get('src') link = card.find_previous('a') if link: course_link = link.get('href') if not course_link.startswith('http'): course_link = 'https://courses.analyticsvidhya.com' + course_link courses_data.append({ 'title': title, 'image_url': img_url, 'course_link': course_link }) return courses_data courses = fetch_free_courses() # Step 2: Load data into a DataFrame df = pd.DataFrame(courses) client = Groq() # Course search function using Groq def course_recommendation(query): try: print(f"Search query: {query}") print(f"Total available courses: {len(df)}") # Prompt construction for Groq prompt = f""" Based on the query: "{query}", Rank the courses below based on relevance (0 to 1), with 1 being highly relevant. Filter out courses with relevance scores below 0.5. Courses: {df['title'].to_string(index=False)} """ print("Sending query to Groq for recommendation...") # Sending the request to Groq for results response = client.chat.completions.create( model="mixtral-8x7b-32768", messages=[ {"role": "system", "content": "You are a course recommendation assistant."}, {"role": "user", "content": prompt} ], temperature=0.3, max_tokens=800 ) print("Response received from Groq.") # Parse the Groq response recommended_courses = [] content = response.choices[0].message.content print("Groq's response:\n", content) for line in content.split('\n'): if line.startswith('Title:'): course_title = line.split('Title:')[1].strip() elif line.startswith('Relevance:'): score = float(line.split('Relevance:')[1].strip()) if score >= 0.5: matching_course = df[df['title'] == course_title] if not matching_course.empty: course_data = matching_course.iloc[0] recommended_courses.append({ 'title': course_title, 'image_url': course_data['image_url'], 'course_link': course_data['course_link'], 'score': score }) return sorted(recommended_courses, key=lambda x: x['score'], reverse=True)[:10] except Exception as e: print(f"Error during course search: {e}") return [] # Gradio function to search and display courses def gradio_search_interface(query): results = course_recommendation(query) if results: html_output = '