File size: 4,848 Bytes
dfbe641
 
 
1d1bc23
8df9ec0
dfbe641
85c676c
 
822b7d2
 
66bbbb0
 
 
f10d1e0
f371fff
694ef2b
 
ce56c40
eb9264d
d21a4cc
8df9ec0
 
 
d21a4cc
8df9ec0
 
 
 
 
 
 
85c676c
8df9ec0
 
85c676c
 
3d891e3
85c676c
 
 
3d891e3
85c676c
 
 
 
f10d1e0
85c676c
 
8df9ec0
694ef2b
0134dcf
8df9ec0
 
 
2b72a19
 
8df9ec0
2b72a19
 
 
8df9ec0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85c676c
 
 
8df9ec0
85c676c
 
d21a4cc
a0b5df2
 
 
6f4097c
85c676c
6f4097c
 
 
85c676c
 
 
1d1bc23
e361379
1d1bc23
85c676c
 
df404d2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import numpy as np
import requests
import streamlit as st
import openai
import json

def main():
    st.title("Scientific Question Generation")

    st.write("This application is designed to generate a question given a piece of scientific text.\
    We include the output from four different models, the [BART-Large](https://huggingface.co/dhmeltzer/bart-large_askscience-qg) and [FLAN-T5-Base](https://huggingface.co/dhmeltzer/flan-t5-base_askscience-qg) models \
    fine-tuned on the r/AskScience split of the [ELI5 dataset](https://huggingface.co/datasets/eli5) as well as the zero-shot output \
    of the [FLAN-T5-XXL](https://huggingface.co/google/flan-t5-xxl) model and the [GPT-3.5-turbo](https://platform.openai.com/docs/models/gpt-3-5) model.\
    For a more thorough discussion of question generation see this [report](https://wandb.ai/dmeltzer/Question_Generation/reports/Exploratory-Data-Analysis-for-r-AskScience--Vmlldzo0MjQwODg1?accessToken=fndbu2ar26mlbzqdphvb819847qqth2bxyi4hqhugbnv97607mj01qc7ed35v6w8) for EDA on the r/AskScience dataset and this \
    [report](https://api.wandb.ai/links/dmeltzer/7an677es) for details on our training procedure.\
    \n\nThe two fine-tuned models (BART-Large and FLAN-T5-Base) are hosted on AWS using a combination of AWS Sagemaker, Lambda, and API gateway.\
    GPT-3.5 is called using the OpenAI API and the FLAN-T5-XXL model is hosted by HuggingFace and is called with their Inference API.\
    \n \n **Disclaimer**: When first running this application it may take approximately 30 seconds for the first two responses to load because of the cold start problem with AWS Lambda.\
    If this happens, please re-enter the input to call the model again and the models will respond quicker on any subsequent calls.")
    
    AWS_checkpoints = {}
    AWS_checkpoints['BART-Large']='https://8hlnvys7bh.execute-api.us-east-1.amazonaws.com/beta/'
    AWS_checkpoints['FLAN-T5-Base']='https://gnrxh05827.execute-api.us-east-1.amazonaws.com/beta/'
    
    # Right now HF_checkpoints just consists of FLAN-T5-XXL but we may add more models later.
    HF_checkpoints = ['google/flan-t5-xxl']

    # Token to access HF inference API
    HF_headers = {"Authorization": f"Bearer {st.secrets['HF_token']}"}

    # Token to access OpenAI API
    openai.api_key = st.secrets['OpenAI_token']

    # Used to query models hosted on Huggingface
    def query(checkpoint, payload):
        API_URL = f"https://api-inference.huggingface.co/models/{checkpoint}"
        
        response = requests.post(API_URL, 
                                    headers=headers, 
                                    json=payload)
        
        return response.json()
    
    # User search
    user_input = st.text_area("Question Generator", 
                                """Black holes can evaporate by emitting Hawking radiation.""")
    
    if user_input:

        for name, url in AWS_checkpoints.items():
            headers={'x-api-key': st.secrets['aws-key']}

            input_data = json.dumps({'inputs':user_input})
            r = requests.get(url,data=input_data,headers=headers)
            try:
                output = r.json()[0]['generated_text']

                st.write(f'**{name}**: {output}')
            except:
                st.write(f'**{name}**: There was an error when calling the model. Please resubmit the question.')

        model_engine = "gpt-3.5-turbo"
        # Max tokens to produce
        max_tokens = 50

        # Prompt GPT-3.5 with an explicit question
        prompt = f"generate a question: {user_input}"

        # We give GPT-3.5 a message so it knows to generate questions from text.
        response=openai.ChatCompletion.create(
            model=model_engine,
            messages=[
                {"role": "system", "content": "You are a helpful assistant that generates questions from text."},
                {"role": "user", "content": prompt},
            ])
    
        output = response['choices'][0]['message']['content']   
        st.write(f'**{model_engine}**: {output}')

        
        for checkpoint in HF_checkpoints:
            
            model_name = checkpoint.split('/')[1]
    
            # For FLAN models we need to give them instructions explicitly.
            if 'flan' in model_name.lower():
                prompt = 'generate a question: ' + user_input

            else:
                prompt = user_input
            
            output = query(checkpoint,{
                        "inputs": prompt,
                        "wait_for_model":True})
            try:
                output=output[0]['generated_text']
            except:
                st.write(output)
                return
            
            st.write(f'**{model_name}**: {output}')
        
if __name__ == "__main__":
    main()
#[0]['generated_text']