Spaces:

forbiddensoul90
/

PhalAI

Sleeping

App Files Files Community

PhalAI / back.py

forbiddensoul90

Update back.py

3d9d5c0 verified 21 days ago

raw

history blame contribute delete

11.5 kB

	import os
	import logging
	from typing import List, Dict, Any, Optional, Union
	from dataclasses import dataclass
	import torch
	from sentence_transformers import SentenceTransformer
	from langchain_community.vectorstores import FAISS
	from langchain_core.embeddings import Embeddings
	import google.generativeai as genai
	from datetime import datetime
	import json

	@dataclass
	class UserInfo:
	"""User information for context"""
	name: str
	college: str
	degree: str
	year: int
	career_goals: str
	has_internship: bool
	has_placement: bool

	@dataclass
	class ChatConfig:
	"""Configuration for the chatbot"""
	embedding_model_name: str = 'all-MiniLM-L6-v2'
	device: str = 'cuda' if torch.cuda.is_available() else 'cpu'
	max_history: int = 3
	gemini_api_key: str = "AIzaSyCAlM-YPVKl1qTnUwInWz9X5sNCmISPOr4" # Replace with your API key
	log_file: str = "chat_history.txt"
	user_data_file: str = "user_data.json"

	# In the UserManager class, modify these methods:
	class UserManager:
	"""Manages user information storage and retrieval"""
	def __init__(self, user_data_file: str):
	self.user_data_file = user_data_file
	self.ensure_file_exists()

	def ensure_file_exists(self):
	"""Create user data file if it doesn't exist"""
	if not os.path.exists(self.user_data_file):
	os.makedirs(os.path.dirname(self.user_data_file), exist_ok=True)
	with open(self.user_data_file, 'w', encoding='utf-8') as f:
	json.dump({}, f)

	def save_user_info(self, user_info: UserInfo):
	"""Save user information to JSON file"""
	try:
	# First ensure the file exists with valid JSON
	self.ensure_file_exists()

	# Read existing data
	try:
	with open(self.user_data_file, 'r', encoding='utf-8') as f:
	data = json.load(f)
	except json.JSONDecodeError:
	data = {}

	# Update data
	data[user_info.name] = {
	"college": user_info.college,
	"degree": user_info.degree,
	"year": user_info.year,
	"career_goals": user_info.career_goals,
	"has_internship": user_info.has_internship,
	"has_placement": user_info.has_placement,
	"last_updated": datetime.now().isoformat()
	}

	# Write back to file
	with open(self.user_data_file, 'w', encoding='utf-8') as f:
	json.dump(data, f, indent=4)
	return True
	except Exception as e:
	logging.error(f"Error saving user info: {str(e)}")
	return False


	class ChatLogger:
	"""Logger for chat interactions"""
	def __init__(self, log_file: str):
	self.log_file = log_file

	def log_interaction(self, question: str, answer: str, user_info: Optional[UserInfo] = None):
	timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	with open(self.log_file, 'a', encoding='utf-8') as f:
	user_context = ""
	if user_info:
	user_context = f"\nUser: {user_info.name} \| College: {user_info.college} \| Degree: {user_info.degree} \| Year: {user_info.year} \| Career Goals: {user_info.career_goals}"
	f.write(f"\n[{timestamp}]{user_context}\nQ: {question}\nA: {answer}\n{'-'*50}")

	class ChatMemory:
	"""Manages chat history"""
	def __init__(self, max_history: int = 3):
	self.max_history = max_history
	self.history = []

	def add_interaction(self, question: str, answer: str):
	self.history.append({"question": question, "answer": answer})
	if len(self.history) > self.max_history:
	self.history.pop(0)

	def get_history(self) -> List[Dict[str, str]]:
	return self.history

	def clear_history(self):
	self.history = []

	class QuestionGenerator:
	def __init__(self, api_key: str):
	genai.configure(api_key=api_key)
	self.generation_config = {
	"temperature": 0.1,
	"top_p": 0.95,
	"max_output_tokens": 8192,
	}
	self.model = genai.GenerativeModel(
	model_name="gemini-1.5-flash",
	generation_config=self.generation_config,
	safety_settings={'HATE': 'BLOCK_NONE','HARASSMENT': 'BLOCK_NONE','SEXUAL' : 'BLOCK_NONE','DANGEROUS' : 'BLOCK_NONE'}
	)

	self.default_questions = [
	"What are some other skills I should focus on to improve my chances?",
	"What resources or platforms can help me in my career journey?",
	"Are there any specific companies or organizations I should target for internships/placements?",
	"What are some common interview questions asked for this career path?"
	]

	async def generate_questions(
	self,
	question: str,
	answer: str,
	user_info: Optional[UserInfo] = None
	) -> List[str]:
	"""Generate follow-up questions based on the conversation"""
	try:
	chat = self.model.start_chat(history=[])
	prompt = f"""Generate 4 simple, practical follow-up questions, that a college student may ask, based on this conversation about career advice:

	Question: {question}
	Answer: {answer}

	Focus the questions on:
	1. Skills development (What skills are needed, how to improve)
	2. Resources and platforms (Where to find internships, jobs, etc.)
	3. Specific target companies/organizations
	4. Common interview questions

	Keep the language simple and student-friendly. Format each question on a new line.

	NOTE: YOU MUST STRICTLY REPLY IN HINGLISH"""

	response = chat.send_message(prompt).text

	# Extract questions
	questions = [q.strip() for q in response.split('\n') if q.strip()]

	# Return default questions if we don't get exactly 4 valid questions
	if len(questions) != 4:
	return self.default_questions

	return questions

	except Exception as e:
	logging.error(f"Error generating questions: {str(e)}")
	return self.default_questions

	class GeminiRAG:
	def __init__(self, api_key: str):
	genai.configure(api_key=api_key)
	self.generation_config = {
	"temperature": 0.1,
	"top_p": 0.95,
	"top_k": 64,
	"max_output_tokens": 8192,
	}
	self.model = genai.GenerativeModel(
	model_name="gemini-1.5-flash",
	generation_config=self.generation_config,
	safety_settings={'HATE': 'BLOCK_NONE','HARASSMENT': 'BLOCK_NONE','SEXUAL' : 'BLOCK_NONE','DANGEROUS' : 'BLOCK_NONE'}
	)

	def create_context(self, relevant_docs: List[Dict[str, Any]]) -> str:
	"""Creates a context string from relevant documents"""
	context_parts = []
	for doc in relevant_docs:
	context_parts.append(f"Section: {doc['metadata']['section']}\n{doc['content']}")
	return "\n\n".join(context_parts)

	async def get_answer(
	self,
	question: str,
	context: str,
	user_info: Optional[UserInfo] = None
	) -> str:
	try:
	chat = self.model.start_chat(history=[])

	# Simplified prompt to reduce chances of recitation
	prompt = f"""As a career counselor, provide a helpful response based on:

	Context: {context}

	{f'''User Background:
	- Student at {user_info.college}
	- Studying {user_info.degree} (Year {user_info.year})
	- Goals: {user_info.career_goals}
	- {'Has internship experience' if user_info.has_internship else 'No internship yet'}
	- {'Has placement' if user_info.has_placement else 'Seeking placement'}''' if user_info else ''}

	Question: {question}

	Provide practical advice with specific examples and actionable steps."""

	try:
	response = chat.send_message(prompt)
	if response.text:
	return response.text
	else:
	return "I apologize, but I couldn't generate a proper response. Please try rephrasing your question."
	except Exception as chat_error:
	logging.error(f"Chat error: {str(chat_error)}")
	return "I encountered an error while processing your question. Please try again with a simpler question."

	except Exception as e:
	logging.error(f"Error generating answer: {str(e)}")
	return "An error occurred. Please try again later."

	class CustomEmbeddings(Embeddings):
	"""Custom embeddings using SentenceTransformer"""
	def __init__(self, model_name: str, device: str):
	self.model = SentenceTransformer(model_name)
	self.model.to(device)

	def embed_documents(self, texts: List[str]) -> List[List[float]]:
	with torch.no_grad():
	embeddings = self.model.encode(texts, convert_to_tensor=True)
	return embeddings.cpu().numpy().tolist()

	def embed_query(self, text: str) -> List[float]:
	with torch.no_grad():
	embedding = self.model.encode([text], convert_to_tensor=True)
	return embedding.cpu().numpy().tolist()[0]

	class ProductDatabase:
	"""Handles document storage and retrieval"""
	def __init__(self, config: ChatConfig):
	self.embeddings = CustomEmbeddings(
	model_name=config.embedding_model_name,
	device=config.device
	)
	self.vectorstore = None

	def process_markdown(self, markdown_content: str):
	"""Process markdown content and create vector store"""
	try:
	sections = markdown_content.split('\n## ')
	documents = []

	if sections[0].startswith('# '):
	intro = sections[0].split('\n', 1)[1]
	documents.append({
	"content": intro,
	"section": "Introduction"
	})

	for section in sections[1:]:
	if section.strip():
	title, content = section.split('\n', 1)
	documents.append({
	"content": content.strip(),
	"section": title.strip()
	})

	texts = [doc["content"] for doc in documents]
	metadatas = [{"section": doc["section"]} for doc in documents]

	self.vectorstore = FAISS.from_texts(
	texts=texts,
	embedding=self.embeddings,
	metadatas=metadatas
	)

	except Exception as e:
	raise Exception(f"Error processing markdown content: {str(e)}")

	def search(self, query: str, k: int = 3) -> List[Dict[str, Any]]:
	"""Search for relevant documents"""
	if not self.vectorstore:
	raise ValueError("Database not initialized. Please process documents first.")

	try:
	docs = self.vectorstore.similarity_search(query, k=k)
	return [{"content": doc.page_content, "metadata": doc.metadata} for doc in docs]
	except Exception as e:
	logging.error(f"Error during search: {str(e)}")
	return []