PCFISH commited on
Commit
f1ec1d5
โ€ข
1 Parent(s): ec113e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -5
app.py CHANGED
@@ -59,13 +59,23 @@ def get_json_file(docs):
59
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
60
  def get_text_chunks(documents):
61
  text_splitter = RecursiveCharacterTextSplitter(
62
- chunk_size=1000, # ์ฒญํฌ์˜ ํฌ๊ธฐ๋ฅผ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
63
- chunk_overlap=200, # ์ฒญํฌ ์‚ฌ์ด์˜ ์ค‘๋ณต์„ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
64
- length_function=len # ํ…์ŠคํŠธ์˜ ๊ธธ์ด๋ฅผ ์ธก์ •ํ•˜๋Š” ํ•จ์ˆ˜๋ฅผ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
65
  )
66
 
67
- documents = text_splitter.split_documents(documents) # ๋ฌธ์„œ๋“ค์„ ์ฒญํฌ๋กœ ๋‚˜๋ˆ•๋‹ˆ๋‹ค
68
- return documents # ๋‚˜๋ˆˆ ์ฒญํฌ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
 
 
 
 
 
 
 
 
 
 
69
 
70
 
71
  # ํ…์ŠคํŠธ ์ฒญํฌ๋“ค๋กœ๋ถ€ํ„ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด๋ฅผ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
 
59
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
60
  def get_text_chunks(documents):
61
  text_splitter = RecursiveCharacterTextSplitter(
62
+ chunk_size=1000,
63
+ chunk_overlap=200,
64
+ length_function=len
65
  )
66
 
67
+ # ๊ฐ ๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ๋ฆฌ์ŠคํŠธ์— ์ถ”๊ฐ€
68
+ texts = []
69
+ for doc in documents:
70
+ if isinstance(doc, str):
71
+ # ๋ฌธ์ž์—ด์ธ ๊ฒฝ์šฐ ๊ทธ๋Œ€๋กœ ์ถ”๊ฐ€
72
+ texts.append(doc)
73
+ else:
74
+ # ๊ทธ ์™ธ์˜ ๊ฒฝ์šฐ์—๋Š” ๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ์ถ”๊ฐ€
75
+ texts.append(doc.page_content)
76
+
77
+ # ๋‚˜๋ˆˆ ์ฒญํฌ๋ฅผ ๋ฐ˜ํ™˜
78
+ return text_splitter.split_documents(texts)
79
 
80
 
81
  # ํ…์ŠคํŠธ ์ฒญํฌ๋“ค๋กœ๋ถ€ํ„ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด๋ฅผ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.