import json
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pandas as pd
import langchain
import os
import openai
import ast
from langchain import OpenAI
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import JSONLoader
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
from typing import List, Dict, Any
import requests

# getting the json files
def get_clinical_record_info(clinical_record_id: str) -> Dict[str, Any]:
    # Request:
    # curl -X GET "https://clinicaltrials.gov/api/v2/studies/NCT00841061" \
    # -H "accept: text/csv"
    request_url = f"https://clinicaltrials.gov/api/v2/studies/{clinical_record_id}"
    response = requests.get(request_url, headers={"accept": "application/json"})
    return response.json()

def get_clinical_records_by_ids(clinical_record_ids: List[str]) -> List[Dict[str, Any]]:
    clinical_records = []
    for clinical_record_id in clinical_record_ids:
        clinical_record_info = get_clinical_record_info(clinical_record_id)
        clinical_records.append(clinical_record_info)
    return clinical_records

def process_json(json_file):
    # processing the files and getting the info needed
    # Open the JSON file for reading
    with open(json_file, 'r') as f:
        data = json.load(f)  # Parse JSON data into a Python dictionary

    # Define the fields you want to keep
    fields_to_keep = ['class_of_organization', 'title', 'overallStatus', 'descriptionModule', 'conditions', 'interventions', 'outcomesModule', 'eligibilityModule']

    # Iterate through the dictionary and keep only the desired fields
    filtered_data = []
    for item in data:
        try:
            organization_name= item['protocolSection']['identificationModule']['organization']['fullName']
        except:
            organization_name= ""
        try:
            project_title= item['protocolSection']['identificationModule']['officialTitle']
        except:
            project_title= ""
        try:    
            status= item['protocolSection']['statusModule']['overallStatus']
        except:
            status= ""
        try:
            brief_description= item['protocolSection']['descriptionModule']['briefSummary']
        except:
            brief_description= ""
        try:
            detailed_description= item['protocolSection']['descriptionModule']['detailedDescription']
        except:
            detailed_description= ""
        try:
            conditions= item['protocolSection']['conditionsModule']['conditions']
        except:
            conditions= []
        try:
            keywords= item['protocolSection']['conditionsModule']['keywords']
        except:
            keywords= []
        try:
            interventions= item['protocolSection']['armsInterventionsModule']['interventions']
        except:
            interventions= []
        try:
            primary_outcomes= item['protocolSection']['outcomesModule']['primaryOutcomes']
        except:
            primary_outcomes= []
        try:
            secondary_outcomes= item['protocolSection']['outcomesModule']['secondaryOutcomes']
        except:
            secondary_outcomes= []
        try:
            eligibility= item['protocolSection']['eligibilityModule']
        except:
            eligibility= {}
        filtered_item = {"organization_name": organization_name, 
                        "project_title": project_title, 
                        "status": status, 
                        "brief_description": brief_description, 
                        "detailed_description": detailed_description, 
                        "keywords":keywords, 
                        "interventions": interventions,
                        "primary_outcomes": primary_outcomes,
                        "secondary_outcomes": secondary_outcomes,
                        "eligibility": eligibility}
        filtered_data.append(filtered_item)

    # for ele in filtered_data:
    #     print(ele)

    # Write the filtered data to a new JSON file
    with open('output.json', 'w') as f:
        json.dump(filtered_data, f, indent=4)

def llm_config():
    tagging_prompt = ChatPromptTemplate.from_template(
        """
    Extract the desired information from the following list of JSON clinical trials.

    Only extract the properties mentioned in the 'Classification' function.

    Passage:
    {input}

    """
    )

    class Classification(BaseModel):
        description: str = Field(description= "text description grouping all the clinical trials using brief_description and detailed_description keys")
        project_title: list = Field(description="Extract the project title of all the clinical trials")
        status: list= Field(description="Extract the status of all the clinical trials")
        keywords: list= Field(description="Extract the most relevant keywords regrouping all the clinical trials")
        interventions: list= Field(description="describe the interventions for each clinical trial using title, name and description")
        primary_outcomes: list= Field(description= "get the primary outcomes of each clinical trial")
        # secondary_outcomes: list= Field(description= "get the secondary outcomes of each clinical trial")
        eligibility: list= Field(description= "get the eligibilityCriteria grouping all the clinical trials")
        # healthy_volunteers: list= Field(description= "determine whether the clinical trial requires healthy volunteers")
        minimum_age: list = Field(description="get the minimum age from each experiment")
        maximum_age: list = Field(description="get the maximum age from each experiment")
        gender: list = Field(description="get the gender from each experiment")

        def get_dict(self): 
            return {
                "summary": self.description,
                "project_title": self.project_title,
                "status": self.status,
                "keywords": self.keywords,
                "interventions": self.interventions,
                "primary_outcomes": self.primary_outcomes,
                # "secondary_outcomes": self.secondary_outcomes,
                "eligibility": self.eligibility,
                # "healthy_volunteers": self.healthy_volunteers,
                "minimum_age": self.minimum_age,
                "maximum_age": self.maximum_age,
                "gender": self.gender
            }
        
    # LLM
    llm = ChatOpenAI(
        temperature=0.6, 
        model="gpt-4",
        openai_api_key="sk-proj-CG2E98bSWs53X2eWO0Z4T3BlbkFJLm7H1vfkbua0zP548CKQ"
    ).with_structured_output(
        Classification
    )

    tagging_chain = tagging_prompt | llm
    
    return tagging_chain

def get_llm_results(results):
    result_dict= results.get_dict()
    return result_dict

def save_llm_results(results_json):
    with open('llm_results.json', 'w') as f:
      json.dump(results_json, f, indent=4)
    
# clinical_record_info = get_clinical_records_by_ids(['NCT00841061', 'NCT03035123', 'NCT02272751', 'NCT03035123', 'NCT03055377'])
# print(clinical_record_info)

# with open('data.json', 'w') as f:
#     json.dump(clinical_record_info, f, indent=4)

# change the json file here and run it to get the output
json_file= "D:/HACKUPC/hupc/klinic/data.json"
process_json(json_file)

with open('output.json', 'r') as file:
    data = json.load(file)

tagging_chain= llm_config()
res= tagging_chain.invoke({"input": data})
result_json= get_llm_results(res)
save_llm_results(result_json)
print(result_json)