Spaces:
Running
Running
seawolf2357
commited on
Commit
β’
c4be42e
1
Parent(s):
13feae4
Update app.py
Browse files
app.py
CHANGED
@@ -10,19 +10,20 @@ from datasets import load_dataset
|
|
10 |
print("Current Working Directory:", os.getcwd())
|
11 |
|
12 |
# λ°μ΄ν°μ
νμΌ μ΄λ¦
|
13 |
-
|
14 |
|
15 |
-
# νμ¬ μμ
λλ ν 리μ νμΌμ΄ μλμ§ νμΈ
|
16 |
-
if os.path.exists(
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
# μμ
λλ ν 리 λ³κ²½ (νμν κ²½μ°)
|
21 |
os.chdir('/home/user/app')
|
22 |
print("Changed directory to:", os.getcwd())
|
|
|
|
|
23 |
|
24 |
# λ°μ΄ν°μ
λ‘λ
|
25 |
-
law_dataset = load_dataset('csv', data_files=
|
26 |
print("Dataset loaded successfully.")
|
27 |
|
28 |
# λ‘κΉ
μ€μ
|
@@ -44,9 +45,6 @@ SPECIFIC_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))
|
|
44 |
# λν νμ€ν 리λ₯Ό μ μ₯ν μ μ λ³μ
|
45 |
conversation_history = []
|
46 |
|
47 |
-
# λ²λ₯ λ°μ΄ν°μ
λ‘λ
|
48 |
-
law_dataset = load_dataset('csv', data_files='train_0.csv')
|
49 |
-
|
50 |
class MyClient(discord.Client):
|
51 |
def __init__(self, *args, **kwargs):
|
52 |
super().__init__(*args, **kwargs)
|
@@ -68,14 +66,12 @@ class MyClient(discord.Client):
|
|
68 |
self.is_processing = True
|
69 |
try:
|
70 |
response = await generate_response(message)
|
71 |
-
# λΉμ΄ μλ μλ΅μ νμΈνκ³ μ²λ¦¬
|
72 |
if response.strip() == "":
|
73 |
response = "μ£μ‘ν©λλ€, μ 곡ν μ μλ μ λ³΄κ° μμ΅λλ€."
|
74 |
await message.channel.send(response)
|
75 |
finally:
|
76 |
self.is_processing = False
|
77 |
|
78 |
-
|
79 |
def is_message_in_specific_channel(self, message):
|
80 |
return message.channel.id == SPECIFIC_CHANNEL_ID or (
|
81 |
isinstance(message.channel, discord.Thread) and message.channel.parent_id == SPECIFIC_CHANNEL_ID
|
@@ -89,26 +85,22 @@ async def generate_response(message):
|
|
89 |
|
90 |
# λ°μ΄ν° κ²μ λ° μλ΅ μ€λΉ
|
91 |
answer = search_in_dataset(user_input, law_dataset)
|
92 |
-
|
|
|
93 |
|
94 |
-
|
95 |
max_length = 2000
|
96 |
if len(full_response_text) > max_length:
|
97 |
-
# λ무 κΈ΄ λ©μμ§λ₯Ό μ¬λ¬ λΆλΆμΌλ‘ λλμ΄ λ³΄λ
λλ€.
|
98 |
for i in range(0, len(full_response_text), max_length):
|
99 |
part_response = full_response_text[i:i+max_length]
|
100 |
await message.channel.send(part_response)
|
101 |
else:
|
102 |
-
# λ©μμ§ κΈΈμ΄κ° μ μ νλ©΄ ν λ²μ μ μ‘
|
103 |
await message.channel.send(full_response_text)
|
104 |
|
105 |
logging.debug(f'Full model response sent: {full_response_text}')
|
106 |
conversation_history.append({"role": "assistant", "content": full_response_text})
|
107 |
|
108 |
-
|
109 |
def search_in_dataset(query, dataset):
|
110 |
-
# κ°λ¨ν κ²μ λ‘μ§μ ꡬνν©λλ€.
|
111 |
-
# μ¬κΈ°μμλ μμ λ‘ λ¨μννκΈ° μν΄ μ²« λ²μ§Έ νλͺ©μ λ°νν©λλ€.
|
112 |
for record in dataset['train']:
|
113 |
if query in record['μ¬κ±΄λͺ
']:
|
114 |
return record['μ¬κ±΄λ²νΈ']
|
|
|
10 |
print("Current Working Directory:", os.getcwd())
|
11 |
|
12 |
# λ°μ΄ν°μ
νμΌ μ΄λ¦
|
13 |
+
data_files = ['train_0.csv', 'train_1.csv', 'train_2.csv', 'train_3.csv', 'train_4.csv', 'train_5.csv']
|
14 |
|
15 |
+
# νμ¬ μμ
λλ ν 리μ λͺ¨λ νμΌμ΄ μλμ§ νμΈ
|
16 |
+
missing_files = [file for file in data_files if not os.path.exists(file)]
|
17 |
+
if missing_files:
|
18 |
+
print(f"Missing files: {missing_files}")
|
19 |
+
# νμν κ²½μ° μμ
λλ ν 리 λ³κ²½
|
|
|
20 |
os.chdir('/home/user/app')
|
21 |
print("Changed directory to:", os.getcwd())
|
22 |
+
else:
|
23 |
+
print("All files are present in the current directory.")
|
24 |
|
25 |
# λ°μ΄ν°μ
λ‘λ
|
26 |
+
law_dataset = load_dataset('csv', data_files=data_files)
|
27 |
print("Dataset loaded successfully.")
|
28 |
|
29 |
# λ‘κΉ
μ€μ
|
|
|
45 |
# λν νμ€ν 리λ₯Ό μ μ₯ν μ μ λ³μ
|
46 |
conversation_history = []
|
47 |
|
|
|
|
|
|
|
48 |
class MyClient(discord.Client):
|
49 |
def __init__(self, *args, **kwargs):
|
50 |
super().__init__(*args, **kwargs)
|
|
|
66 |
self.is_processing = True
|
67 |
try:
|
68 |
response = await generate_response(message)
|
|
|
69 |
if response.strip() == "":
|
70 |
response = "μ£μ‘ν©λλ€, μ 곡ν μ μλ μ λ³΄κ° μμ΅λλ€."
|
71 |
await message.channel.send(response)
|
72 |
finally:
|
73 |
self.is_processing = False
|
74 |
|
|
|
75 |
def is_message_in_specific_channel(self, message):
|
76 |
return message.channel.id == SPECIFIC_CHANNEL_ID or (
|
77 |
isinstance(message.channel, discord.Thread) and message.channel.parent_id == SPECIFIC_CHANNEL_ID
|
|
|
85 |
|
86 |
# λ°μ΄ν° κ²μ λ° μλ΅ μ€λΉ
|
87 |
answer = search_in_dataset(user_input, law_dataset)
|
88 |
+
if not answer:
|
89 |
+
answer = "κ΄λ ¨ λ²λ₯ μ 보λ₯Ό μ°Ύμ μ μμ΅λλ€."
|
90 |
|
91 |
+
full_response_text = system_message + "\n\n" + answer
|
92 |
max_length = 2000
|
93 |
if len(full_response_text) > max_length:
|
|
|
94 |
for i in range(0, len(full_response_text), max_length):
|
95 |
part_response = full_response_text[i:i+max_length]
|
96 |
await message.channel.send(part_response)
|
97 |
else:
|
|
|
98 |
await message.channel.send(full_response_text)
|
99 |
|
100 |
logging.debug(f'Full model response sent: {full_response_text}')
|
101 |
conversation_history.append({"role": "assistant", "content": full_response_text})
|
102 |
|
|
|
103 |
def search_in_dataset(query, dataset):
|
|
|
|
|
104 |
for record in dataset['train']:
|
105 |
if query in record['μ¬κ±΄λͺ
']:
|
106 |
return record['μ¬κ±΄λ²νΈ']
|