File size: 4,663 Bytes
a84a65c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import pandas as pd
import requests
import argparse
import os
from openai import OpenAI
openai_key = 'sk-cEOsqGI9gAOrxgqQ6D1B9U3nvZrpzsy98m03RDfrNf9XOYwP'

# def get_struct(caption):
#     headers = {
#         'Content-Type': 'application/json',
#         'Authorization': f'{openai_key}',
#     }

#     json_data = {
#         'model': 'gpt-3.5-turbo',
#         'messages': [
#             {
#                 'role': 'user',
#                 'content':f'I want to know what sound might be in the given scene and you need to give me the results in the following format:\
#                 Question: A bird sings on the river in the morning, a cow passes by and scares away the bird.\
#                 Answer: <running water& all>@<birds chriping& start>@<cow footsteps& mid>@<birds flying away& end>.\
#                 Question: cellphone ringing a variety of tones followed by a loud explosion and fire crackling as a truck engine runs idle\
#                 Answer: <variety cellphone ringing tones& start>@<loud explosion& end>@<fire crackling& end>@<truck engine idle& end>\
#                 Question: Train passing followed by short honks three times \
#                 Answer: <train passing& all>@<short honks three times& end>\
#                 All indicates the sound exists in the whole scene \
#                 Start, mid, end indicates the time period the sound appear.\
#                 Question: {caption} \
#                 Answer:',
#             },
#         ],
#         'temperature': 0.0,
#     }

#     response = requests.post('https://api.openai.com/v1/chat/completions', headers=headers, json=json_data)
#     return eval(response.content)['choices'][0]["message"]["content"]


def get_struct(caption):
    client = OpenAI(
        base_url='https://api.openai-proxy.org/v1',
        api_key='sk-cEOsqGI9gAOrxgqQ6D1B9U3nvZrpzsy98m03RDfrNf9XOYwP',
    )
    chat_completion = client.chat.completions.create(
        messages=[
            {
                'role': 'user',
                'content':f'I want to know what sound might be in the given scene and you need to give me the results in the following format:\

                Question: A bird sings on the river in the morning, a cow passes by and scares away the bird.\

                Answer: <running water& all>@<birds chriping& start>@<cow footsteps& mid>@<birds flying away& end>.\

                Question: cellphone ringing a variety of tones followed by a loud explosion and fire crackling as a truck engine runs idle\

                Answer: <variety cellphone ringing tones& start>@<loud explosion& end>@<fire crackling& end>@<truck engine idle& end>\

                Question: Train passing followed by short honks three times \

                Answer: <train passing& all>@<short honks three times& end>\

                All indicates the sound exists in the whole scene \

                Start, mid, end indicates the time period the sound appear.\

                Question: {caption} \

                Answer:',
            },
        ],
        model="gpt-3.5-turbo",
    )
    print(chat_completion)
    print(chat_completion.choices[0].message.content)
    
    return chat_completion.choices[0].message.content

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument( "--tsv_path",type=str)
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    tsv_path = args.tsv_path
    ori_df = pd.read_csv(tsv_path,sep='\t')
    index = 0
    end = len(ori_df)
    name = os.path.basename(tsv_path)[:-4]
    f = open(f'{name}.txt','w')
    newcap_list = []
    while index < end - 1:
        try:
            df = ori_df.iloc[index:end]
            for t in df.itertuples():
                index = int(t[0])
                ori_caption = getattr(t,'caption')
                strcut_cap = get_struct(ori_caption)
                if 'sorry' in strcut_cap.lower():
                    strcut_cap = f'<{ori_caption.lower()}, all>'
                newcap_list.append(strcut_cap)
                f.write(f'{index}\t{strcut_cap}\n')
                f.flush()
        except:
            print("error")
            f.flush()
    f.close()
    with open(f'{name}.txt')  as f:
        lines = f.readlines()
    id2cap = {}
    for line in lines:
        # print(line)
        index,caption = line.strip().split('\t')
        id2cap[int(index)] = caption

    df = pd.read_csv(f'{name}.tsv',sep='\t')
    df['struct_cap'] = df.index.map(id2cap)
    df.to_csv(f'{name}_struct.tsv',sep='\t',index=False)