dsmultimedika commited on
Commit
708e223
1 Parent(s): 420855c

Init application

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
.gitignore ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Ignore Visual Studio temporary files, build results, and
2
+ ## files generated by popular Visual Studio add-ons.
3
+ ##
4
+ ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
5
+
6
+ # User-specific files
7
+ *.rsuser
8
+ *.suo
9
+ *.user
10
+ *.userosscache
11
+ *.sln.docstates
12
+
13
+ # User-specific files (MonoDevelop/Xamarin Studio)
14
+ *.userprefs
15
+
16
+ # Mono auto generated files
17
+ mono_crash.*
18
+
19
+ # Build results
20
+ [Dd]ebug/
21
+ [Dd]ebugPublic/
22
+ [Rr]elease/
23
+ [Rr]eleases/
24
+ x64/
25
+ x86/
26
+ [Ww][Ii][Nn]32/
27
+ [Aa][Rr][Mm]/
28
+ [Aa][Rr][Mm]64/
29
+ bld/
30
+ [Bb]in/
31
+ [Oo]bj/
32
+ [Ll]og/
33
+ [Ll]ogs/
34
+
35
+ # Visual Studio 2015/2017 cache/options directory
36
+ .vs/
37
+ # Uncomment if you have tasks that create the project's static files in wwwroot
38
+ #wwwroot/
39
+
40
+ # Visual Studio 2017 auto generated files
41
+ Generated\ Files/
42
+
43
+ # MSTest test Results
44
+ [Tt]est[Rr]esult*/
45
+ [Bb]uild[Ll]og.*
46
+
47
+ # NUnit
48
+ *.VisualState.xml
49
+ TestResult.xml
50
+ nunit-*.xml
51
+
52
+ # Build Results of an ATL Project
53
+ [Dd]ebugPS/
54
+ [Rr]eleasePS/
55
+ dlldata.c
56
+
57
+ # Benchmark Results
58
+ BenchmarkDotNet.Artifacts/
59
+
60
+ # .NET Core
61
+ project.lock.json
62
+ project.fragment.lock.json
63
+ artifacts/
64
+
65
+ # ASP.NET Scaffolding
66
+ ScaffoldingReadMe.txt
67
+
68
+ # StyleCop
69
+ StyleCopReport.xml
70
+
71
+ # Files built by Visual Studio
72
+ *_i.c
73
+ *_p.c
74
+ *_h.h
75
+ *.ilk
76
+ *.meta
77
+ *.obj
78
+ *.iobj
79
+ *.pch
80
+ *.pdb
81
+ *.ipdb
82
+ *.pgc
83
+ *.pgd
84
+ *.rsp
85
+ *.sbr
86
+ *.tlb
87
+ *.tli
88
+ *.tlh
89
+ *.tmp
90
+ *.tmp_proj
91
+ *_wpftmp.csproj
92
+ *.log
93
+ *.tlog
94
+ *.vspscc
95
+ *.vssscc
96
+ .builds
97
+ *.pidb
98
+ *.svclog
99
+ *.scc
100
+
101
+ # Chutzpah Test files
102
+ _Chutzpah*
103
+
104
+ # Visual C++ cache files
105
+ ipch/
106
+ *.aps
107
+ *.ncb
108
+ *.opendb
109
+ *.opensdf
110
+ *.sdf
111
+ *.cachefile
112
+ *.VC.db
113
+ *.VC.VC.opendb
114
+
115
+ # Visual Studio profiler
116
+ *.psess
117
+ *.vsp
118
+ *.vspx
119
+ *.sap
120
+
121
+ # Visual Studio Trace Files
122
+ *.e2e
123
+
124
+ # TFS 2012 Local Workspace
125
+ $tf/
126
+
127
+ # Guidance Automation Toolkit
128
+ *.gpState
129
+
130
+ # ReSharper is a .NET coding add-in
131
+ _ReSharper*/
132
+ *.[Rr]e[Ss]harper
133
+ *.DotSettings.user
134
+
135
+ # TeamCity is a build add-in
136
+ _TeamCity*
137
+
138
+ # DotCover is a Code Coverage Tool
139
+ *.dotCover
140
+
141
+ # AxoCover is a Code Coverage Tool
142
+ .axoCover/*
143
+ !.axoCover/settings.json
144
+
145
+ # Coverlet is a free, cross platform Code Coverage Tool
146
+ coverage*.json
147
+ coverage*.xml
148
+ coverage*.info
149
+
150
+ # Visual Studio code coverage results
151
+ *.coverage
152
+ *.coveragexml
153
+
154
+ # NCrunch
155
+ _NCrunch_*
156
+ .*crunch*.local.xml
157
+ nCrunchTemp_*
158
+
159
+ # MightyMoose
160
+ *.mm.*
161
+ AutoTest.Net/
162
+
163
+ # Web workbench (sass)
164
+ .sass-cache/
165
+
166
+ # Installshield output folder
167
+ [Ee]xpress/
168
+
169
+ # DocProject is a documentation generator add-in
170
+ DocProject/buildhelp/
171
+ DocProject/Help/*.HxT
172
+ DocProject/Help/*.HxC
173
+ DocProject/Help/*.hhc
174
+ DocProject/Help/*.hhk
175
+ DocProject/Help/*.hhp
176
+ DocProject/Help/Html2
177
+ DocProject/Help/html
178
+
179
+ # Click-Once directory
180
+ publish/
181
+
182
+ # Publish Web Output
183
+ *.[Pp]ublish.xml
184
+ *.azurePubxml
185
+ # Note: Comment the next line if you want to checkin your web deploy settings,
186
+ # but database connection strings (with potential passwords) will be unencrypted
187
+ *.pubxml
188
+ *.publishproj
189
+
190
+ # Microsoft Azure Web App publish settings. Comment the next line if you want to
191
+ # checkin your Azure Web App publish settings, but sensitive information contained
192
+ # in these scripts will be unencrypted
193
+ PublishScripts/
194
+
195
+ # NuGet Packages
196
+ *.nupkg
197
+ # NuGet Symbol Packages
198
+ *.snupkg
199
+ # The packages folder can be ignored because of Package Restore
200
+ **/[Pp]ackages/*
201
+ # except build/, which is used as an MSBuild target.
202
+ !**/[Pp]ackages/build/
203
+ # Uncomment if necessary however generally it will be regenerated when needed
204
+ #!**/[Pp]ackages/repositories.config
205
+ # NuGet v3's project.json files produces more ignorable files
206
+ *.nuget.props
207
+ *.nuget.targets
208
+
209
+ # Microsoft Azure Build Output
210
+ csx/
211
+ *.build.csdef
212
+
213
+ # Microsoft Azure Emulator
214
+ ecf/
215
+ rcf/
216
+
217
+ # Windows Store app package directories and files
218
+ AppPackages/
219
+ BundleArtifacts/
220
+ Package.StoreAssociation.xml
221
+ _pkginfo.txt
222
+ *.appx
223
+ *.appxbundle
224
+ *.appxupload
225
+
226
+ # Visual Studio cache files
227
+ # files ending in .cache can be ignored
228
+ *.[Cc]ache
229
+ # but keep track of directories ending in .cache
230
+ !?*.[Cc]ache/
231
+
232
+ # Others
233
+ ClientBin/
234
+ ~$*
235
+ *~
236
+ *.dbmdl
237
+ *.dbproj.schemaview
238
+ *.jfm
239
+ *.pfx
240
+ *.publishsettings
241
+ orleans.codegen.cs
242
+
243
+ # Including strong name files can present a security risk
244
+ # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245
+ #*.snk
246
+
247
+ # Since there are multiple workflows, uncomment next line to ignore bower_components
248
+ # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249
+ #bower_components/
250
+
251
+ # RIA/Silverlight projects
252
+ Generated_Code/
253
+
254
+ # Backup & report files from converting an old project file
255
+ # to a newer Visual Studio version. Backup files are not needed,
256
+ # because we have git ;-)
257
+ _UpgradeReport_Files/
258
+ Backup*/
259
+ UpgradeLog*.XML
260
+ UpgradeLog*.htm
261
+ ServiceFabricBackup/
262
+ *.rptproj.bak
263
+
264
+ # SQL Server files
265
+ *.mdf
266
+ *.ldf
267
+ *.ndf
268
+
269
+ # Business Intelligence projects
270
+ *.rdl.data
271
+ *.bim.layout
272
+ *.bim_*.settings
273
+ *.rptproj.rsuser
274
+ *- [Bb]ackup.rdl
275
+ *- [Bb]ackup ([0-9]).rdl
276
+ *- [Bb]ackup ([0-9][0-9]).rdl
277
+
278
+ # Microsoft Fakes
279
+ FakesAssemblies/
280
+
281
+ # GhostDoc plugin setting file
282
+ *.GhostDoc.xml
283
+
284
+ # Node.js Tools for Visual Studio
285
+ .ntvs_analysis.dat
286
+ node_modules/
287
+
288
+ # Visual Studio 6 build log
289
+ *.plg
290
+
291
+ # Visual Studio 6 workspace options file
292
+ *.opt
293
+
294
+ # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295
+ *.vbw
296
+
297
+ # Visual Studio 6 auto-generated project file (contains which files were open etc.)
298
+ *.vbp
299
+
300
+ # Visual Studio 6 workspace and project file (working project files containing files to include in project)
301
+ *.dsw
302
+ *.dsp
303
+
304
+ # Visual Studio 6 technical files
305
+ *.ncb
306
+ *.aps
307
+
308
+ # Visual Studio LightSwitch build output
309
+ **/*.HTMLClient/GeneratedArtifacts
310
+ **/*.DesktopClient/GeneratedArtifacts
311
+ **/*.DesktopClient/ModelManifest.xml
312
+ **/*.Server/GeneratedArtifacts
313
+ **/*.Server/ModelManifest.xml
314
+ _Pvt_Extensions
315
+
316
+ # Paket dependency manager
317
+ .paket/paket.exe
318
+ paket-files/
319
+
320
+ # FAKE - F# Make
321
+ .fake/
322
+
323
+ # CodeRush personal settings
324
+ .cr/personal
325
+
326
+ # Python Tools for Visual Studio (PTVS)
327
+ __pycache__/
328
+ *.pyc
329
+
330
+ # Cake - Uncomment if you are using it
331
+ # tools/**
332
+ # !tools/packages.config
333
+
334
+ # Tabs Studio
335
+ *.tss
336
+
337
+ # Telerik's JustMock configuration file
338
+ *.jmconfig
339
+
340
+ # BizTalk build output
341
+ *.btp.cs
342
+ *.btm.cs
343
+ *.odx.cs
344
+ *.xsd.cs
345
+
346
+ # OpenCover UI analysis results
347
+ OpenCover/
348
+
349
+ # Azure Stream Analytics local run output
350
+ ASALocalRun/
351
+
352
+ # MSBuild Binary and Structured Log
353
+ *.binlog
354
+
355
+ # NVidia Nsight GPU debugger configuration file
356
+ *.nvuser
357
+
358
+ # MFractors (Xamarin productivity tool) working folder
359
+ .mfractor/
360
+
361
+ # Local History for Visual Studio
362
+ .localhistory/
363
+
364
+ # Visual Studio History (VSHistory) files
365
+ .vshistory/
366
+
367
+ # BeatPulse healthcheck temp database
368
+ healthchecksdb
369
+
370
+ # Backup folder for Package Reference Convert tool in Visual Studio 2017
371
+ MigrationBackup/
372
+
373
+ # Ionide (cross platform F# VS Code tools) working folder
374
+ .ionide/
375
+
376
+ # Fody - auto-generated XML schema
377
+ FodyWeavers.xsd
378
+
379
+ # VS Code files for those working on multiple tools
380
+ .vscode/*
381
+ !.vscode/settings.json
382
+ !.vscode/tasks.json
383
+ !.vscode/launch.json
384
+ !.vscode/extensions.json
385
+ *.code-workspace
386
+
387
+ # Local History for Visual Studio Code
388
+ .history/
389
+
390
+ # Windows Installer files from build outputs
391
+ *.cab
392
+ *.msi
393
+ *.msix
394
+ *.msm
395
+ *.msp
396
+
397
+ # JetBrains Rider
398
+ *.sln.iml
399
+
400
+ .env
401
+ *.pem
402
+ *.base64
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python image from Docker Hub
2
+ FROM python:3.11
3
+
4
+ RUN useradd -m -u 1000 user
5
+ USER user
6
+ ENV PATH="/home/user/.local/bin:$PATH"
7
+
8
+ WORKDIR /app
9
+
10
+ COPY --chown=user ./requirements.txt requirements.txt
11
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
12
+
13
+ COPY --chown=user . /app
14
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
alembic.ini ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # A generic, single database configuration.
2
+
3
+ [alembic]
4
+ # path to migration scripts
5
+ # Use forward slashes (/) also on windows to provide an os agnostic path
6
+ script_location = alembic
7
+
8
+ # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
9
+ # Uncomment the line below if you want the files to be prepended with date and time
10
+ # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
11
+ # for all available tokens
12
+ # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
13
+
14
+ # sys.path path, will be prepended to sys.path if present.
15
+ # defaults to the current working directory.
16
+ prepend_sys_path = .
17
+
18
+ # timezone to use when rendering the date within the migration file
19
+ # as well as the filename.
20
+ # If specified, requires the python>=3.9 or backports.zoneinfo library.
21
+ # Any required deps can installed by adding `alembic[tz]` to the pip requirements
22
+ # string value is passed to ZoneInfo()
23
+ # leave blank for localtime
24
+ # timezone =
25
+
26
+ # max length of characters to apply to the "slug" field
27
+ # truncate_slug_length = 40
28
+
29
+ # set to 'true' to run the environment during
30
+ # the 'revision' command, regardless of autogenerate
31
+ # revision_environment = false
32
+
33
+ # set to 'true' to allow .pyc and .pyo files without
34
+ # a source .py file to be detected as revisions in the
35
+ # versions/ directory
36
+ # sourceless = false
37
+
38
+ # version location specification; This defaults
39
+ # to alembic/versions. When using multiple version
40
+ # directories, initial revisions must be specified with --version-path.
41
+ # The path separator used here should be the separator specified by "version_path_separator" below.
42
+ # version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
43
+
44
+ # version path separator; As mentioned above, this is the character used to split
45
+ # version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
46
+ # If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
47
+ # Valid values for version_path_separator are:
48
+ #
49
+ # version_path_separator = :
50
+ # version_path_separator = ;
51
+ # version_path_separator = space
52
+ # version_path_separator = newline
53
+ version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
54
+
55
+ # set to 'true' to search source files recursively
56
+ # in each "version_locations" directory
57
+ # new in Alembic version 1.10
58
+ # recursive_version_locations = false
59
+
60
+ # the output encoding used when revision files
61
+ # are written from script.py.mako
62
+ # output_encoding = utf-8
63
+
64
+ sqlalchemy.url = placeholder_url
65
+
66
+
67
+ [post_write_hooks]
68
+ # post_write_hooks defines scripts or Python functions that are run
69
+ # on newly generated revision scripts. See the documentation for further
70
+ # detail and examples
71
+
72
+ # format using "black" - use the console_scripts runner, against the "black" entrypoint
73
+ # hooks = black
74
+ # black.type = console_scripts
75
+ # black.entrypoint = black
76
+ # black.options = -l 79 REVISION_SCRIPT_FILENAME
77
+
78
+ # lint with attempts to fix using "ruff" - use the exec runner, execute a binary
79
+ # hooks = ruff
80
+ # ruff.type = exec
81
+ # ruff.executable = %(here)s/.venv/bin/ruff
82
+ # ruff.options = --fix REVISION_SCRIPT_FILENAME
83
+
84
+ # Logging configuration
85
+ [loggers]
86
+ keys = root,sqlalchemy,alembic
87
+
88
+ [handlers]
89
+ keys = console
90
+
91
+ [formatters]
92
+ keys = generic
93
+
94
+ [logger_root]
95
+ level = WARN
96
+ handlers = console
97
+ qualname =
98
+
99
+ [logger_sqlalchemy]
100
+ level = WARN
101
+ handlers =
102
+ qualname = sqlalchemy.engine
103
+
104
+ [logger_alembic]
105
+ level = INFO
106
+ handlers =
107
+ qualname = alembic
108
+
109
+ [handler_console]
110
+ class = StreamHandler
111
+ args = (sys.stderr,)
112
+ level = NOTSET
113
+ formatter = generic
114
+
115
+ [formatter_generic]
116
+ format = %(levelname)-5.5s [%(name)s] %(message)s
117
+ datefmt = %H:%M:%S
alembic/README ADDED
@@ -0,0 +1 @@
 
 
1
+ Generic single-database configuration.
alembic/env.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from logging.config import fileConfig
2
+
3
+ from sqlalchemy import engine_from_config
4
+ from sqlalchemy import pool
5
+ from dotenv import load_dotenv
6
+ from alembic import context
7
+ from db.models import Base
8
+
9
+ import os
10
+
11
+ load_dotenv()
12
+
13
+ # this is the Alembic Config object, which provides
14
+ # access to the values within the .ini file in use.
15
+ config = context.config
16
+
17
+ # Interpret the config file for Python logging.
18
+ # This line sets up loggers basically.
19
+ if config.config_file_name is not None:
20
+ fileConfig(config.config_file_name)
21
+
22
+ config.set_main_option(
23
+ "sqlalchemy.url", os.getenv("DB_URI_SQL_ALCHEMY")
24
+ )
25
+ # add your model's MetaData object here
26
+ # for 'autogenerate' support
27
+ # from myapp import mymodel
28
+ # target_metadata = mymodel.Base.metadata
29
+ target_metadata = Base.metadata
30
+
31
+ # other values from the config, defined by the needs of env.py,
32
+ # can be acquired:
33
+ # my_important_option = config.get_main_option("my_important_option")
34
+ # ... etc.
35
+
36
+
37
+ def run_migrations_offline() -> None:
38
+ """Run migrations in 'offline' mode.
39
+
40
+ This configures the context with just a URL
41
+ and not an Engine, though an Engine is acceptable
42
+ here as well. By skipping the Engine creation
43
+ we don't even need a DBAPI to be available.
44
+
45
+ Calls to context.execute() here emit the given string to the
46
+ script output.
47
+
48
+ """
49
+ url = config.get_main_option("sqlalchemy.url")
50
+ context.configure(
51
+ url=url,
52
+ target_metadata=target_metadata,
53
+ literal_binds=True,
54
+ dialect_opts={"paramstyle": "named"},
55
+ )
56
+
57
+ with context.begin_transaction():
58
+ context.run_migrations()
59
+
60
+
61
+ def run_migrations_online() -> None:
62
+ """Run migrations in 'online' mode.
63
+
64
+ In this scenario we need to create an Engine
65
+ and associate a connection with the context.
66
+
67
+ """
68
+ connectable = engine_from_config(
69
+ config.get_section(config.config_ini_section, {}),
70
+ prefix="sqlalchemy.",
71
+ poolclass=pool.NullPool,
72
+ )
73
+
74
+ with connectable.connect() as connection:
75
+ context.configure(
76
+ connection=connection, target_metadata=target_metadata
77
+ )
78
+
79
+ with context.begin_transaction():
80
+ context.run_migrations()
81
+
82
+
83
+ if context.is_offline_mode():
84
+ run_migrations_offline()
85
+ else:
86
+ run_migrations_online()
alembic/script.py.mako ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """${message}
2
+
3
+ Revision ID: ${up_revision}
4
+ Revises: ${down_revision | comma,n}
5
+ Create Date: ${create_date}
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ ${imports if imports else ""}
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = ${repr(up_revision)}
16
+ down_revision: Union[str, None] = ${repr(down_revision)}
17
+ branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
18
+ depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
19
+
20
+
21
+ def upgrade() -> None:
22
+ ${upgrades if upgrades else "pass"}
23
+
24
+
25
+ def downgrade() -> None:
26
+ ${downgrades if downgrades else "pass"}
alembic/versions/404f8a028e0e_add_bot_name.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """add bot_name
2
+
3
+ Revision ID: 404f8a028e0e
4
+ Revises: 98b1b4a0de39
5
+ Create Date: 2024-10-04 14:03:44.098762
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = '404f8a028e0e'
16
+ down_revision: Union[str, None] = '98b1b4a0de39'
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ # ### commands auto generated by Alembic - please adjust! ###
23
+ op.add_column('bot', sa.Column('bot_name', sa.String(length=200), nullable=False))
24
+ # ### end Alembic commands ###
25
+
26
+
27
+ def downgrade() -> None:
28
+ # ### commands auto generated by Alembic - please adjust! ###
29
+ op.drop_column('bot', 'bot_name')
30
+ # ### end Alembic commands ###
alembic/versions/426e52aa13aa_migration_description.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """migration description
2
+
3
+ Revision ID: 426e52aa13aa
4
+ Revises:
5
+ Create Date: 2024-10-02 14:32:47.859996
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ from sqlalchemy.dialects import mysql
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = '426e52aa13aa'
16
+ down_revision: Union[str, None] = None
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ # ### commands auto generated by Alembic - please adjust! ###
23
+ op.drop_table('Category')
24
+ op.drop_table('Role')
25
+ op.drop_table('Metadata')
26
+ op.drop_table('_prisma_migrations')
27
+ op.drop_index('email', table_name='User')
28
+ op.drop_table('User')
29
+ op.add_column('bot', sa.Column('user_id', sa.Integer(), nullable=True))
30
+ op.add_column('metadata', sa.Column('title', sa.String(length=100), nullable=True))
31
+ op.create_foreign_key(None, 'bot', 'user', ['user_id'], ['id'])
32
+ op.alter_column('session', 'id',
33
+ existing_type=mysql.CHAR(length=36),
34
+ type_=sa.String(length=36),
35
+ existing_nullable=False)
36
+ # ### end Alembic commands ###
37
+
38
+
39
+ def downgrade() -> None:
40
+ # ### commands auto generated by Alembic - please adjust! ###
41
+ op.alter_column('session', 'id',
42
+ existing_type=sa.String(length=36),
43
+ type_=mysql.CHAR(length=36),
44
+ existing_nullable=False)
45
+ op.drop_constraint(None, 'bot', type_='foreignkey')
46
+ op.drop_column('bot', 'user_id')
47
+ op.create_table('User',
48
+ sa.Column('id', mysql.INTEGER(), autoincrement=True, nullable=False),
49
+ sa.Column('name', mysql.VARCHAR(length=50), nullable=False),
50
+ sa.Column('email', mysql.VARCHAR(length=100), nullable=False),
51
+ sa.Column('password_hash', mysql.VARCHAR(length=100), nullable=False),
52
+ sa.Column('created_at', mysql.DATETIME(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
53
+ sa.Column('updated_at', mysql.DATETIME(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
54
+ sa.PrimaryKeyConstraint('id')
55
+ )
56
+ op.create_index('email', 'User', ['email'], unique=True)
57
+ op.create_table('_prisma_migrations',
58
+ sa.Column('id', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=36), nullable=False),
59
+ sa.Column('checksum', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=64), nullable=False),
60
+ sa.Column('finished_at', mysql.DATETIME(fsp=3), nullable=True),
61
+ sa.Column('migration_name', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=255), nullable=False),
62
+ sa.Column('logs', mysql.TEXT(collation='utf8mb4_unicode_ci'), nullable=True),
63
+ sa.Column('rolled_back_at', mysql.DATETIME(fsp=3), nullable=True),
64
+ sa.Column('started_at', mysql.DATETIME(fsp=3), server_default=sa.text('CURRENT_TIMESTAMP(3)'), nullable=False),
65
+ sa.Column('applied_steps_count', mysql.INTEGER(unsigned=True), server_default=sa.text("'0'"), autoincrement=False, nullable=False),
66
+ sa.PrimaryKeyConstraint('id')
67
+ )
68
+ op.create_table('Metadata',
69
+ sa.Column('id', mysql.INTEGER(), autoincrement=True, nullable=False),
70
+ sa.Column('title', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=191), nullable=False),
71
+ sa.Column('category', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=191), nullable=False),
72
+ sa.Column('author', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=191), nullable=False),
73
+ sa.Column('year', mysql.INTEGER(), autoincrement=False, nullable=False),
74
+ sa.Column('publisher', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=191), nullable=False),
75
+ sa.Column('createdAt', mysql.DATETIME(fsp=3), server_default=sa.text('CURRENT_TIMESTAMP(3)'), nullable=False),
76
+ sa.Column('updatedAt', mysql.DATETIME(fsp=3), nullable=False),
77
+ sa.PrimaryKeyConstraint('id')
78
+ )
79
+ op.create_table('Role',
80
+ sa.Column('id', mysql.INTEGER(), autoincrement=True, nullable=False),
81
+ sa.Column('role_name', mysql.VARCHAR(length=100), nullable=False),
82
+ sa.Column('description', mysql.VARCHAR(length=100), nullable=True),
83
+ sa.PrimaryKeyConstraint('id')
84
+ )
85
+ op.create_table('Category',
86
+ sa.Column('id', mysql.INTEGER(), autoincrement=True, nullable=False),
87
+ sa.Column('category', mysql.VARCHAR(length=100), nullable=True),
88
+ sa.Column('created_at', mysql.DATETIME(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
89
+ sa.PrimaryKeyConstraint('id')
90
+ )
91
+ # ### end Alembic commands ###
alembic/versions/98b1b4a0de39_add_username.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """add username
2
+
3
+ Revision ID: 98b1b4a0de39
4
+ Revises: c818e5b84075
5
+ Create Date: 2024-10-03 14:27:43.877725
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = '98b1b4a0de39'
16
+ down_revision: Union[str, None] = 'c818e5b84075'
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ # ### commands auto generated by Alembic - please adjust! ###
23
+ op.add_column('user', sa.Column('username', sa.String(length=100), nullable=False))
24
+ op.create_unique_constraint(None, 'user', ['username'])
25
+ # ### end Alembic commands ###
26
+
27
+
28
+ def downgrade() -> None:
29
+ # ### commands auto generated by Alembic - please adjust! ###
30
+ op.drop_constraint(None, 'user', type_='unique')
31
+ op.drop_column('user', 'username')
32
+ # ### end Alembic commands ###
alembic/versions/c818e5b84075_add_role_id.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """add role id
2
+
3
+ Revision ID: c818e5b84075
4
+ Revises: 426e52aa13aa
5
+ Create Date: 2024-10-03 09:35:53.882054
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ from sqlalchemy.dialects import mysql
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = 'c818e5b84075'
16
+ down_revision: Union[str, None] = '426e52aa13aa'
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ # ### commands auto generated by Alembic - please adjust! ###
23
+ op.drop_table('_prisma_migrations')
24
+ op.alter_column('category', 'category',
25
+ existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
26
+ type_=sa.String(length=200),
27
+ existing_nullable=True)
28
+ op.alter_column('feedback', 'comment',
29
+ existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
30
+ type_=sa.String(length=1000),
31
+ existing_nullable=True)
32
+ op.alter_column('message', 'goal',
33
+ existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
34
+ type_=sa.String(length=200),
35
+ existing_nullable=True)
36
+ op.alter_column('metadata', 'title',
37
+ existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
38
+ type_=sa.String(length=200),
39
+ existing_nullable=True)
40
+ op.alter_column('metadata', 'author',
41
+ existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
42
+ type_=sa.String(length=200),
43
+ existing_nullable=True)
44
+ op.alter_column('role', 'role_name',
45
+ existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
46
+ type_=sa.String(length=200),
47
+ existing_nullable=False)
48
+ op.alter_column('role', 'description',
49
+ existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
50
+ type_=sa.String(length=200),
51
+ existing_nullable=True)
52
+ op.add_column('user', sa.Column('role_id', sa.Integer(), nullable=True))
53
+ op.alter_column('user', 'name',
54
+ existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=50),
55
+ type_=sa.String(length=100),
56
+ existing_nullable=False)
57
+ op.create_foreign_key(None, 'user', 'role', ['role_id'], ['id'])
58
+ # ### end Alembic commands ###
59
+
60
+
61
+ def downgrade() -> None:
62
+ # ### commands auto generated by Alembic - please adjust! ###
63
+ op.drop_constraint(None, 'user', type_='foreignkey')
64
+ op.alter_column('user', 'name',
65
+ existing_type=sa.String(length=100),
66
+ type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=50),
67
+ existing_nullable=False)
68
+ op.drop_column('user', 'role_id')
69
+ op.alter_column('role', 'description',
70
+ existing_type=sa.String(length=200),
71
+ type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
72
+ existing_nullable=True)
73
+ op.alter_column('role', 'role_name',
74
+ existing_type=sa.String(length=200),
75
+ type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
76
+ existing_nullable=False)
77
+ op.alter_column('metadata', 'author',
78
+ existing_type=sa.String(length=200),
79
+ type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
80
+ existing_nullable=True)
81
+ op.alter_column('metadata', 'title',
82
+ existing_type=sa.String(length=200),
83
+ type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
84
+ existing_nullable=True)
85
+ op.alter_column('message', 'goal',
86
+ existing_type=sa.String(length=200),
87
+ type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
88
+ existing_nullable=True)
89
+ op.alter_column('feedback', 'comment',
90
+ existing_type=sa.String(length=1000),
91
+ type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
92
+ existing_nullable=True)
93
+ op.alter_column('category', 'category',
94
+ existing_type=sa.String(length=200),
95
+ type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
96
+ existing_nullable=True)
97
+ op.create_table('_prisma_migrations',
98
+ sa.Column('id', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=36), nullable=False),
99
+ sa.Column('checksum', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=64), nullable=False),
100
+ sa.Column('finished_at', mysql.DATETIME(fsp=3), nullable=True),
101
+ sa.Column('migration_name', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=255), nullable=False),
102
+ sa.Column('logs', mysql.TEXT(collation='utf8mb4_unicode_ci'), nullable=True),
103
+ sa.Column('rolled_back_at', mysql.DATETIME(fsp=3), nullable=True),
104
+ sa.Column('started_at', mysql.DATETIME(fsp=3), server_default=sa.text('CURRENT_TIMESTAMP(3)'), nullable=False),
105
+ sa.Column('applied_steps_count', mysql.INTEGER(unsigned=True), server_default=sa.text("'0'"), autoincrement=False, nullable=False),
106
+ sa.PrimaryKeyConstraint('id')
107
+ )
108
+ # ### end Alembic commands ###
api/__init__.py ADDED
File without changes
api/auth.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException, Request
2
+ from fastapi.responses import RedirectResponse
3
+ from pydantic import BaseModel
4
+ from sqlalchemy.orm import Session
5
+ from db.models import User
6
+ from starlette import status
7
+ from datetime import timedelta, datetime, timezone
8
+ from db.database import get_db
9
+ from passlib.context import CryptContext
10
+ from typing import Annotated
11
+ from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
12
+ from jose import jwt, JWTError
13
+
14
+
15
+ # Define OAuth2Password flow for Swagger
16
+
17
+ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="login")
18
+
19
+ # Custom OAuth2 request form to accept email, username, password, and role_id
20
+
21
+ router = APIRouter(prefix="/auth", tags=["auth"])
22
+
23
+ SECRET_KEY = "557673909e7ad0c6c702e18550f6432754a55205d209c4c9fb27966bfb844555"
24
+ ALGORITHM = "HS256"
25
+
26
+ bcrypt_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
27
+
28
+ # Database dependency
29
+ db_dependency = Annotated[Session, Depends(get_db)]
30
+
31
+
32
+ def authenticate_user(email: str, password: str, db):
33
+ user = db.query(User).filter(User.email == email).first()
34
+ if not user:
35
+ return False
36
+
37
+ if not bcrypt_context.verify(password, user.hashed_password):
38
+ return False
39
+ return user
40
+
41
+ def create_access_token(
42
+ username: str, user_id: int, role_id: int, expires_delta: timedelta, email: str
43
+ ):
44
+ encode = {"sub": username, "id": user_id, "role_id": role_id, "email": email}
45
+ expires = datetime.now(timezone.utc) + expires_delta
46
+ encode.update({"exp": expires})
47
+ return jwt.encode(encode, SECRET_KEY, algorithm=ALGORITHM)
48
+
49
+ async def get_current_user(token: Annotated[str, Depends(oauth2_scheme)]):
50
+ try:
51
+ payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
52
+ username: str = payload.get('sub')
53
+ user_id: int = payload.get('id')
54
+ role_id: int = payload.get('role_id')
55
+ email: str = payload.get('email')
56
+
57
+ if username is None or user_id is None:
58
+ raise HTTPException(
59
+ status_code=status.HTTP_401_UNAUTHORIZED,
60
+ detail="Could not validate user.",
61
+ )
62
+
63
+ return {"username": username, "id" : user_id, "role_id":role_id, "email":email}
64
+
65
+ except JWTError:
66
+ raise HTTPException(
67
+ status_code=status.HTTP_401_UNAUTHORIZED, detail="Could not validate user."
68
+ )
api/events.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from db.models import Base
3
+ from db.database import engine
4
+ from api.router.topic import db_conn
5
+ from llama_index.core import set_global_handler
6
+ from dotenv import load_dotenv
7
+ import os
8
+
9
+
10
+ load_dotenv()
11
+
12
+
13
+ async def startup() -> None:
14
+ Base.metadata.create_all(engine)
15
+ print("table added")
16
+ await db_conn.connect()
17
+ os.environ["LANGFUSE_SECRET_KEY"] = os.getenv("LANGFUSE_SECRET_KEY")
18
+ os.environ["LANGFUSE_PUBLIC_KEY"] = os.getenv("LANGFUSE_PUBLIC_KEY")
19
+ os.environ["LANGFUSE_HOST"] = os.getenv("LANGFUSE_HOST")
20
+ set_global_handler("langfuse")
21
+
22
+
23
+ async def shutdown() -> None:
24
+ # await db_conn.disconnect()
25
+ pass
26
+
27
+ def register_events(app: FastAPI) -> FastAPI:
28
+ app.add_event_handler("startup", startup)
29
+ app.add_event_handler("shutdown", shutdown)
30
+ return app
api/function.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from script.vector_db import IndexManager
2
+ from script.document_uploader import Uploader
3
+ from db.save_data import InsertDatabase
4
+ from db.get_data import GetDatabase
5
+ from db.delete_data import DeleteDatabase
6
+ from db.update_data import UpdateDatabase
7
+
8
+ from typing import Any
9
+ from fastapi import UploadFile
10
+ from fastapi import HTTPException
11
+
12
+ from service.dto import ChatMessage
13
+ from core.chat.engine import Engine
14
+ from core.chat.chatstore import ChatStore
15
+ from core.parser import clean_text, update_response, renumber_sources, seperate_to_list
16
+ from llama_index.core.llms import MessageRole
17
+ from service.dto import BotResponseStreaming
18
+ from service.aws_loader import Loader
19
+
20
+
21
+ from typing import List
22
+ import logging
23
+ import re
24
+
25
+
26
+ # Configure logging
27
+ logging.basicConfig(level=logging.INFO)
28
+
29
+
30
+ async def data_ingestion(db_conn, category_id, reference, file: UploadFile) -> Any:
31
+ try:
32
+
33
+ # insert_database = InsertDatabase(db_conn)
34
+
35
+ # Insert data into the database
36
+ # await insert_database.insert_data(reference, category_id)
37
+
38
+ # Upload to AWS
39
+ file_name = f"{reference['title']}"
40
+ aws_loader = Loader()
41
+
42
+ file_obj = file
43
+ aws_loader.upload_to_s3(file_obj, file_name)
44
+
45
+ uploader = Uploader(reference, file)
46
+ print("uploader : ", uploader)
47
+
48
+ nodes_with_metadata = await uploader.process_documents()
49
+
50
+ # response = json.dumps({"status": "success", "message": "Vector Index loaded successfully."})
51
+
52
+ # Build indexes using IndexManager
53
+ index = IndexManager()
54
+ response = index.build_indexes(nodes_with_metadata)
55
+
56
+ return response
57
+
58
+ except Exception as e:
59
+ # Log the error and raise HTTPException for FastAPI
60
+ logging.error(f"An error occurred in data ingestion: {e}")
61
+ raise HTTPException(
62
+ status_code=500,
63
+ detail="An internal server error occurred in data ingestion.",
64
+ )
65
+
66
+
67
+ async def get_data(db_conn, title="", fetch_all_data=True):
68
+ get_database = GetDatabase(db_conn)
69
+ print(get_database)
70
+ try:
71
+ if fetch_all_data:
72
+ results = await get_database.get_all_data()
73
+ print(results)
74
+ logging.info("Database fetched all data")
75
+ return results
76
+ else:
77
+ results = await get_database.get_data(title)
78
+ logging.info("Database fetched one data")
79
+ return results
80
+
81
+ except Exception as e:
82
+ # Log the error and raise HTTPException for FastAPI
83
+ logging.error(f"An error occurred in get data.: {e}")
84
+ raise HTTPException(
85
+ status_code=500, detail="An internal server error occurred in get data."
86
+ )
87
+
88
+
89
+ async def update_data(id: int, reference, db_conn):
90
+ update_database = UpdateDatabase(db_conn)
91
+ try:
92
+ reference = reference.model_dump()
93
+ print(reference)
94
+ reference.update({"id": id})
95
+ print(reference)
96
+ await update_database.update_record(reference)
97
+ response = {"status": "Update Success"}
98
+ return response
99
+ except Exception as e:
100
+ # Log the error and raise HTTPException for FastAPI
101
+ logging.error(f"An error occurred in update data.: {e}")
102
+ raise HTTPException(
103
+ status_code=500, detail="An internal server error occurred in update data."
104
+ )
105
+
106
+
107
+ async def delete_data(id: int, db_conn):
108
+ delete_database = DeleteDatabase(db_conn)
109
+ try:
110
+ params = {"id": id}
111
+ await delete_database.delete_record(params)
112
+ response = {"status": "Delete Success"}
113
+ return response
114
+ except Exception as e:
115
+ # Log the error and raise HTTPException for FastAPI
116
+ logging.error(f"An error occurred in get data.: {e}")
117
+ raise HTTPException(
118
+ status_code=500, detail="An internal server error occurred in delete data."
119
+ )
120
+
121
+
122
+ def generate_completion_non_streaming(
123
+ session_id, user_request, titles: List = None, type="general"
124
+ ):
125
+ try:
126
+ engine = Engine()
127
+ index_manager = IndexManager()
128
+ chatstore = ChatStore()
129
+
130
+ # Load existing indexes
131
+ index = index_manager.load_existing_indexes()
132
+
133
+ if type == "general":
134
+ # Retrieve the chat engine with the loaded index
135
+ chat_engine = engine.get_chat_engine(session_id, index)
136
+ else:
137
+ # Retrieve the chat engine with the loaded index
138
+ chat_engine = engine.get_chat_engine(session_id, index, titles, type)
139
+
140
+ # Generate completion response
141
+ response = chat_engine.chat(user_request)
142
+
143
+ sources = response.sources
144
+
145
+ number_reference = list(set(re.findall(r"\[(\d+)\]", str(response))))
146
+ number_reference_sorted = sorted(number_reference)
147
+
148
+ contents = []
149
+ metadata_collection = []
150
+ scores = []
151
+
152
+ if number_reference_sorted:
153
+ for number in number_reference_sorted:
154
+ # Konversi number ke integer untuk digunakan sebagai indeks
155
+ number = int(number)
156
+
157
+ # Pastikan sources tidak kosong dan memiliki elemen yang diperlukan
158
+ if sources and len(sources) > 0:
159
+ node = dict(sources[0])["raw_output"].source_nodes
160
+
161
+ # Pastikan number valid sebagai indeks
162
+ if 0 <= number - 1 < len(node):
163
+
164
+ content = clean_text(node[number - 1].node.get_text())
165
+ contents.append(content)
166
+
167
+ metadata = dict(node[number - 1].node.metadata)
168
+ metadata_collection.append(metadata)
169
+
170
+ score = node[number - 1].score
171
+ scores.append(score)
172
+ else:
173
+ print(f"Invalid reference number: {number}")
174
+ else:
175
+ print("No sources available")
176
+ else:
177
+ print("There are no references")
178
+
179
+ response = update_response(str(response))
180
+ contents = renumber_sources(contents)
181
+
182
+ # Check the lengths of content and metadata
183
+ num_content = len(contents)
184
+ num_metadata = len(metadata_collection)
185
+
186
+ # Add content to metadata
187
+ for i in range(min(num_content, num_metadata)):
188
+ metadata_collection[i]["content"] = re.sub(r"source \d+\:", "", contents[i])
189
+
190
+ message = ChatMessage(
191
+ role=MessageRole.ASSISTANT, content=response, metadata=metadata_collection
192
+ )
193
+
194
+ chatstore.delete_last_message(session_id)
195
+ chatstore.add_message(session_id, message)
196
+ chatstore.clean_message(session_id)
197
+
198
+ return str(response), metadata_collection, scores
199
+ except Exception as e:
200
+ # Log the error and raise HTTPException for FastAPI
201
+ logging.error(f"An error occurred in generate text: {e}")
202
+ raise HTTPException(
203
+ status_code=500,
204
+ detail="An internal server error occurred in generate text.",
205
+ )
206
+
207
+
208
+ async def generate_streaming_completion(user_request, chat_engine, session_id):
209
+ try:
210
+ engine = Engine()
211
+ index_manager = IndexManager()
212
+
213
+ # Load existing indexes
214
+ index = index_manager.load_existing_indexes()
215
+
216
+ # Retrieve the chat engine with the loaded index
217
+ chat_engine = engine.get_chat_engine(index, session_id)
218
+ # Generate completion response
219
+ response = chat_engine.stream_chat(user_request)
220
+
221
+ completed_response = ""
222
+
223
+ for gen in response.response_gen:
224
+ completed_response += gen # Concatenate the new string
225
+ yield BotResponseStreaming(
226
+ content=gen, completed_content=completed_response
227
+ )
228
+
229
+ nodes = response.source_nodes
230
+ for node in nodes:
231
+ reference = str(clean_text(node.node.get_text()))
232
+ metadata = dict(node.node.metadata)
233
+ score = float(node.score)
234
+ yield BotResponseStreaming(
235
+ completed_content=completed_response,
236
+ reference=reference,
237
+ metadata=metadata,
238
+ score=score,
239
+ )
240
+ except Exception as e:
241
+ yield {"error": str(e)}
242
+
243
+ except Exception as e:
244
+ # Log the error and raise HTTPException for FastAPI
245
+ logging.error(f"An error occurred in generate text: {e}")
246
+ raise HTTPException(
247
+ status_code=500,
248
+ detail="An internal server error occurred in generate text.",
249
+ )
api/router/__init__.py ADDED
File without changes
api/router/bot.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Depends
2
+ from service.dto import UserPromptRequest, BotResponse, BotCreateRequest
3
+ from pydantic import BaseModel
4
+
5
+ from core.chat.chatstore import ChatStore
6
+
7
+ from db.database import get_db
8
+ from db.models import Bot_Meta, Bot, Metadata
9
+ from db.models import Session as SessionModel
10
+
11
+ from sqlalchemy.orm import selectinload
12
+ from sqlalchemy import select
13
+
14
+ from api.function import (
15
+ generate_streaming_completion,
16
+ generate_completion_non_streaming,
17
+ )
18
+ from api.router.user import user_dependency
19
+ from sse_starlette.sse import EventSourceResponse
20
+ from utils.utils import generate_uuid
21
+ from typing import Annotated, List, Optional
22
+ from sqlalchemy.orm import Session
23
+ from sqlalchemy.exc import SQLAlchemyError
24
+ from sqlalchemy.exc import NoResultFound
25
+
26
+ from langfuse.llama_index import LlamaIndexCallbackHandler
27
+ from sqlalchemy import and_
28
+
29
+
30
+ router = APIRouter(tags=["Bot"])
31
+
32
+ db_dependency = Annotated[Session, Depends(get_db)]
33
+
34
+
35
+ def get_chat_store():
36
+ return ChatStore()
37
+
38
+
39
+ @router.post("/bot_general/new")
40
+ async def create_session_general():
41
+ session_id = generate_uuid()
42
+ return {"session_id": session_id}
43
+
44
+
45
+ @router.post("/bot")
46
+ async def create_bot(
47
+ user: user_dependency,
48
+ db: db_dependency,
49
+ bot_request: BotCreateRequest,
50
+ ):
51
+
52
+ if user is None:
53
+ raise HTTPException(status_code=401, detail="Authentication Failed")
54
+
55
+ # Create a new bot entry
56
+ try:
57
+ # Create a new bot entry
58
+ new_bot = Bot(
59
+ user_id=user.get("id"), bot_name=bot_request.bot_name
60
+ ) # Assuming user has an 'id' attribute
61
+
62
+ # Add the new bot to the database
63
+ db.add(new_bot)
64
+ db.commit() # Commit the transaction
65
+ db.refresh(new_bot) # Optional: Refresh the instance with the database state
66
+
67
+ return {"status": "success", "bot_id": new_bot.id}
68
+
69
+ except SQLAlchemyError as e:
70
+ db.rollback() # Roll back the transaction in case of an error
71
+ raise HTTPException(status_code=500, detail=f"Database error: {str(e)}")
72
+ except Exception as e:
73
+ raise HTTPException(
74
+ status_code=500, detail=f"An unexpected error occurred: {str(e)}"
75
+ )
76
+
77
+ class BotMetaCreate(BaseModel):
78
+ metadata_id: List[int] # List of integers for metadata_id
79
+
80
+ @router.post("/meta/{bot_id}")
81
+ async def create_bot_specific(
82
+ user: user_dependency,
83
+ db: db_dependency,
84
+ bot_id: int,
85
+ metadata_id: List[Optional[int]], # Use the Pydantic model
86
+ ):
87
+ if user is None:
88
+ raise HTTPException(status_code=401, detail="Authentication Failed")
89
+
90
+ try:
91
+ # metadata_id = bot_meta_data.metadata_id
92
+ print("metadata id = ", metadata_id)
93
+ # Create BotMeta instances for each metadata_id
94
+ bot_meta_entries = [
95
+ Bot_Meta(bot_id=bot_id, metadata_id=mid) for mid in metadata_id
96
+ ]
97
+
98
+ print(bot_meta_entries)
99
+
100
+ # Insert all entries into the database
101
+ db.add_all(bot_meta_entries)
102
+ db.commit() # Commit the transaction
103
+
104
+ except SQLAlchemyError as e:
105
+ raise HTTPException(status_code=500, detail=f"Database error: {str(e)}")
106
+ except Exception as e:
107
+ raise HTTPException(
108
+ status_code=500, detail=f"An unexpected error occurred: {str(e)}"
109
+ )
110
+
111
+ return {"status": "success", "bot_meta": [entry.id for entry in bot_meta_entries]}
112
+
113
+
114
+ @router.post("/session/{bot_id}/new")
115
+ async def create_new_session(
116
+ user: user_dependency, db: db_dependency, bot_id: int
117
+ ):
118
+ # Check if user is authenticated
119
+ if user is None:
120
+ raise HTTPException(status_code=401, detail="Authentication Failed")
121
+
122
+ print(user.get('id'))
123
+ user_id = user.get('id')
124
+ # Ensure the bot belongs to the user
125
+ bot_query = select(Bot).where(Bot.id == bot_id, Bot.user_id == user_id)
126
+
127
+ try:
128
+ bot = db.execute(bot_query).scalar_one()
129
+
130
+ except NoResultFound:
131
+ raise HTTPException(
132
+ status_code=404, detail="Bot not found or unauthorized access."
133
+ )
134
+
135
+ # Generate a new session ID (UUID)
136
+ try :
137
+ session_id = generate_uuid()
138
+
139
+ # Create the new session
140
+ new_session = SessionModel(
141
+ id=session_id,
142
+ user_id=user.get('id'),
143
+ bot_id=bot_id,
144
+ )
145
+
146
+ db.add(new_session)
147
+ db.commit() # Commit the new session to the database
148
+
149
+ return {
150
+ "session_id": session_id,
151
+ }
152
+
153
+ except Exception as e:
154
+ raise HTTPException(
155
+ status_code=500, detail=f"An unexpected in retrieving session id {str(e)}"
156
+ )
157
+
158
+ @router.get("/bot/{session_id}")
159
+ async def get_session_id(
160
+ user: user_dependency,
161
+ session_id: str,
162
+ chat_store: ChatStore = Depends(get_chat_store),
163
+ ):
164
+ if user is None:
165
+ raise HTTPException(status_code=401, detail="Authentication Failed")
166
+
167
+ chat_history = chat_store.get_messages(session_id)
168
+
169
+ if not chat_history:
170
+ raise HTTPException(status_code=404, detail="Session not found or empty.")
171
+
172
+ return chat_history
173
+
174
+
175
+ @router.get("/bot/all/{bot_id}")
176
+ async def get_all_session_ids(user: user_dependency, db: db_dependency, bot_id: int):
177
+ if user is None:
178
+ raise HTTPException(status_code=401, detail="Authentication Failed")
179
+
180
+ try:
181
+ # Query the session IDs based on the user ID
182
+ print(user.get('id'))
183
+ print(bot_id)
184
+ query = (
185
+ select(SessionModel.id)
186
+ .where(SessionModel.user_id == user.get('id'), SessionModel.bot_id == bot_id)
187
+ )
188
+
189
+ result = db.execute(query)
190
+ session_ids = result.scalars().all()
191
+
192
+ # Convert list of tuples to a simple list
193
+ print(session_ids)
194
+
195
+ return {"session_ids" : session_ids}
196
+
197
+ except Exception as e:
198
+ # Log the error and raise HTTPException for FastAPI
199
+ print(f"An error occurred while fetching session IDs: {e}")
200
+ raise HTTPException(status_code=400, detail="Error retrieving session IDs")
201
+
202
+
203
+ @router.post("/bot/{session_id}")
204
+ async def bot_generator_general(
205
+ session_id: str, user_prompt_request: UserPromptRequest
206
+ ):
207
+
208
+ langfuse_callback_handler = LlamaIndexCallbackHandler()
209
+ langfuse_callback_handler.set_trace_params(user_id="guest", session_id=session_id)
210
+
211
+ if user_prompt_request.streaming:
212
+ return EventSourceResponse(
213
+ generate_streaming_completion(
214
+ session_id,
215
+ user_prompt_request.prompt,
216
+ )
217
+ )
218
+ else:
219
+ response, metadata, scores = generate_completion_non_streaming(
220
+ session_id,
221
+ user_prompt_request.prompt,
222
+ )
223
+
224
+ return BotResponse(
225
+ content=response,
226
+ metadata=metadata,
227
+ scores=scores,
228
+ )
229
+
230
+
231
+ @router.post("/bot/{bot_id}/{session_id}")
232
+ async def bot_generator_spesific(
233
+ user: user_dependency,
234
+ db: db_dependency,
235
+ bot_id: int,
236
+ session_id: str,
237
+ user_prompt_request: UserPromptRequest,
238
+ ):
239
+ if user is None:
240
+ raise HTTPException(status_code=401, detail="Authentication Failed")
241
+
242
+ langfuse_callback_handler = LlamaIndexCallbackHandler()
243
+ langfuse_callback_handler.set_trace_params(
244
+ user_id=user.get('username'), session_id=session_id
245
+ )
246
+
247
+ # Query to retrieve the titles
248
+ try:
249
+ query = (
250
+ select(Metadata.title)
251
+ .join(Bot_Meta, Metadata.id == Bot_Meta.metadata_id)
252
+ .join(SessionModel, Bot_Meta.bot_id == bot_id)
253
+ .where(SessionModel.user_id == user.get('id'), SessionModel.id == session_id)
254
+ )
255
+
256
+ result = db.execute(query)
257
+ titles = result.scalars().all()
258
+ print(titles)
259
+
260
+ except SQLAlchemyError as e:
261
+ raise HTTPException(status_code=500, detail=f"Database error: {str(e)}")
262
+ except Exception as e:
263
+ raise HTTPException(
264
+ status_code=500, detail=f"An unexpected error occurred: {str(e)}"
265
+ )
266
+
267
+ if user_prompt_request.streaming:
268
+ return EventSourceResponse(
269
+ generate_streaming_completion(
270
+ session_id,
271
+ user_prompt_request.prompt,
272
+ )
273
+ )
274
+ else:
275
+ response, metadata, scores = generate_completion_non_streaming(
276
+ session_id, user_prompt_request.prompt, titles, type="specific"
277
+ )
278
+
279
+ return BotResponse(
280
+ content=response,
281
+ metadata=metadata,
282
+ scores=scores,
283
+ )
284
+
285
+
286
+ @router.delete("/bot/{session_id}")
287
+ async def delete_bot(session_id: str, chat_store: ChatStore = Depends(get_chat_store)):
288
+ try:
289
+ chat_store.delete_messages(session_id)
290
+ return {"info": f"Delete {session_id} successful"}
291
+ except Exception as e:
292
+ # Log the error and raise HTTPException for FastAPI
293
+ print(f"An error occurred in update data.: {e}")
294
+ raise HTTPException(status_code=400, detail="the error when deleting message")
295
+
296
+
297
+ # @router.get("/bot/{bot_id}")
298
+ # async def get_favourite_data(user: user_dependency):
299
+ # pass
api/router/health.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Request
2
+ from fastapi.responses import JSONResponse
3
+ from fastapi.routing import APIRouter
4
+
5
+ router = APIRouter(tags=["Health"])
6
+
7
+
8
+ @router.get("/_health")
9
+ async def health(request: Request):
10
+ return JSONResponse(dict(status="OK"), status_code=200)
api/router/role.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+
3
+
4
+ router = APIRouter(tags=["Roles"])
5
+
6
+ @router.get("/roles")
7
+ async def get_data_roles():
8
+ pass
9
+
10
+
11
+ @router.post("/roles")
12
+ async def add_data_roles():
13
+ pass
14
+ @router.put("/roles/{id}")
15
+ async def update_data_roles():
16
+ pass
17
+
18
+ @router.delete("/roles/{id}")
19
+ async def remove_data_roles():
20
+ pass
api/router/topic.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from api.function import data_ingestion, get_data, delete_data, update_data
2
+ from api.auth import get_current_user
3
+ from api.router.user import user_dependency
4
+ from fastapi import Form, APIRouter, File, UploadFile, HTTPException, Depends
5
+
6
+ from db.repository import get_db_conn
7
+ from db.get_data import GetDatabase
8
+ from db.models import Category, Metadata
9
+ from db.database import get_db
10
+
11
+ from langfuse.llama_index import LlamaIndexCallbackHandler
12
+
13
+ from config import MYSQL_CONFIG
14
+ from script.vector_db import IndexManager
15
+ from service.dto import MetadataRequest
16
+ from sqlalchemy.orm import Session
17
+ from sqlalchemy.future import select
18
+ from sqlalchemy.exc import SQLAlchemyError
19
+
20
+ from typing import Annotated
21
+
22
+ router = APIRouter(tags=["Topics"])
23
+
24
+ db_conn = get_db_conn(MYSQL_CONFIG)
25
+ get_database = GetDatabase(db_conn)
26
+ index_manager = IndexManager()
27
+ db_dependency = Annotated[Session, Depends(get_db)]
28
+
29
+
30
+ @router.post("/topic")
31
+ async def upload_file(
32
+ user: user_dependency,
33
+ db: db_dependency,
34
+ title: str = Form(...),
35
+ author: str = Form(...),
36
+ category_id: int = Form(...),
37
+ year: int = Form(...),
38
+ publisher: str = Form(...),
39
+ file: UploadFile = File(...),
40
+ ):
41
+ print(user.get('role_id'))
42
+ # if user is None or user.get('role_id') != 1:
43
+ if user is None:
44
+ raise HTTPException(status_code=401, detail='Authentication Failed')
45
+
46
+ try:
47
+ # Query the category based on category_id
48
+ category_query = select(Category.category).where(Category.id == category_id)
49
+ result = db.execute(category_query)
50
+ category = result.scalar_one_or_none()
51
+
52
+ # Check if the category exists
53
+ if category is None:
54
+ raise HTTPException(status_code=404, detail="Category not found")
55
+
56
+ # Construct the reference dictionary
57
+ reference = {
58
+ "title": title,
59
+ "author": author,
60
+ "category": category,
61
+ "year": year,
62
+ "publisher": publisher,
63
+ }
64
+
65
+ except SQLAlchemyError as db_exc:
66
+ # Handle any database-related errors (e.g., connection issues, query issues)
67
+ print(f"Database error: {db_exc}")
68
+ raise HTTPException(status_code=500, detail="Database error occurred")
69
+
70
+ except Exception as e:
71
+ # Catch any other general exceptions
72
+ print(f"Error: {e}")
73
+ raise HTTPException(status_code=500, detail="An error occurred while processing your request") from e
74
+
75
+ try:
76
+ # Assuming you have a Langfuse callback handler
77
+ langfuse_callback_handler = LlamaIndexCallbackHandler()
78
+ langfuse_callback_handler.set_trace_params(
79
+ user_id="admin_book_uploaded",
80
+ )
81
+
82
+ # Process the file and handle data ingestion
83
+ response = await data_ingestion(db_conn, category_id, reference, file)
84
+
85
+ except Exception as e:
86
+ # Handle any errors related to file processing or data ingestion
87
+ print(f"File processing error: {e}")
88
+ raise HTTPException(status_code=500, detail="File processing error")
89
+
90
+ # Return a successful response with the uploaded filename and response from data ingestion
91
+ return {"filename": file.filename, "response": response}
92
+
93
+
94
+ @router.get("/topic")
95
+ async def get_metadata(user: user_dependency,):
96
+ if user is None or user.get('role_id') != 1:
97
+ raise HTTPException(status_code=401, detail='Authentication Failed')
98
+ results = await get_data(db_conn)
99
+ return results
100
+
101
+
102
+ @router.put("/topic/{id}")
103
+ async def update_metadata(user: user_dependency, db: db_dependency, id: int, reference: MetadataRequest):
104
+ if user is None or user.get('role_id') != 1:
105
+ raise HTTPException(status_code=401, detail='Authentication Failed')
106
+
107
+ try:
108
+ old_metadata = await get_database.get_data_by_id(id)
109
+
110
+ # Fetch old and new categories
111
+ old_category = (
112
+ db.execute(
113
+ select(Category.category)
114
+ .join(Metadata)
115
+ .where(Metadata.id == id)
116
+ ).scalar_one_or_none()
117
+ )
118
+
119
+ print("old category", old_category)
120
+
121
+ new_category = (
122
+ db.execute(
123
+ select(Category.category)
124
+ .where(Category.id == reference.category_id)
125
+ ).scalar_one_or_none()
126
+ )
127
+
128
+ print("new category", new_category)
129
+
130
+ if old_category is None or new_category is None:
131
+ raise HTTPException(status_code=404, detail="Category not found.")
132
+
133
+ # Prepare the references
134
+ old_reference = {
135
+ "title": old_metadata["title"],
136
+ "author": old_metadata["author"],
137
+ "category": old_category,
138
+ "year": old_metadata["year"],
139
+ "publisher": old_metadata["publisher"],
140
+ }
141
+ print(old_reference)
142
+
143
+ new_reference = {
144
+ "title": reference.title,
145
+ "author": reference.author,
146
+ "category": new_category,
147
+ "year": reference.year,
148
+ "publisher": reference.publisher,
149
+ }
150
+ print(new_reference)
151
+ print("reference : ", reference)
152
+
153
+ # index_manager.update_vector_database(old_reference, new_reference)
154
+
155
+ return await update_data(id, reference, db_conn)
156
+ except Exception as e:
157
+ raise HTTPException(
158
+ status_code=500, detail="An error occurred while updating metadata"
159
+ )
160
+
161
+
162
+ @router.delete("/topic/{id}")
163
+ async def delete_metadata(user: user_dependency, id: int):
164
+ if user is None or user.get('role_id') != 1:
165
+ raise HTTPException(status_code=401, detail='Authentication Failed')
166
+
167
+ try:
168
+ old_reference = await get_database.get_data_by_id(id)
169
+ index_manager.delete_vector_database(old_reference)
170
+
171
+ return await delete_data(id, db_conn)
172
+ # return {"Status":"success"}
173
+
174
+ except Exception as e:
175
+ print(e)
176
+ raise HTTPException(
177
+ status_code=500, detail="An error occurred while delete metadata"
178
+ )
api/router/trial.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+
3
+
4
+ router = APIRouter(tags=["Trial"])
5
+
6
+ @router.get("/roles")
7
+ async def get_trial_data():
8
+ pass
9
+
10
+
11
+ @router.post("/roles")
12
+ async def add_trial_data():
13
+ pass
14
+ @router.put("/roles/{id}")
15
+ async def update_trial_data():
16
+ pass
17
+
18
+ @router.delete("/roles/{id}")
19
+ async def remove_trial_data():
20
+ pass
api/router/user.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException, status
2
+ from fastapi.security import OAuth2PasswordRequestForm
3
+ from db.models import User
4
+ from db.database import get_db
5
+ from api.auth import get_current_user, create_access_token
6
+ from service.dto import CreateUserRequest, UserVerification, Token
7
+ from typing import Annotated
8
+ from passlib.context import CryptContext
9
+ from sqlalchemy.orm import Session
10
+ from datetime import timedelta
11
+
12
+
13
+ router = APIRouter(tags=["User"])
14
+
15
+ bcrypt_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
16
+
17
+ db_dependency = Annotated[Session, Depends(get_db)]
18
+ user_dependency = Annotated[dict, Depends(get_current_user)]
19
+
20
+ ACCESS_TOKEN_EXPIRE_MINUTES = 43200
21
+
22
+
23
+ @router.post("/login", response_model=Token)
24
+ async def login_for_access_token(
25
+ login_data: Annotated[OAuth2PasswordRequestForm, Depends()], db: Session = Depends(get_db)
26
+ ):
27
+ user = db.query(User).filter(User.username == login_data.username).first()
28
+
29
+ if not user or not bcrypt_context.verify(login_data.password, user.password_hash):
30
+ raise HTTPException(
31
+ status_code=status.HTTP_401_UNAUTHORIZED,
32
+ detail="Incorrect username or password",
33
+ headers={"WWW-Authenticate": "Bearer"},
34
+ )
35
+
36
+ try :
37
+ access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
38
+ access_token = create_access_token(
39
+ user.username ,user.id, user.role_id, access_token_expires, user.email
40
+ )
41
+
42
+ return {"access_token": access_token, "token_type": "bearer"}
43
+
44
+ except Exception as e:
45
+ print(e)
46
+ raise HTTPException(
47
+ status_code=500, detail="An error occuring when login"
48
+ )
49
+
50
+
51
+ @router.get("/login", response_model=dict)
52
+ async def get_user(user: user_dependency):
53
+ return {"name": user.get('username'), "id" : user.get('id'), "email": user.get('email'), "role": user.get('role_id')}
54
+
55
+
56
+ @router.post("/register")
57
+ async def register_user(db: db_dependency, create_user_request: CreateUserRequest):
58
+ existing_user = (
59
+ db.query(User).filter(User.email == create_user_request.email).first()
60
+ )
61
+
62
+ if existing_user:
63
+ raise HTTPException(
64
+ status_code=status.HTTP_400_BAD_REQUEST,
65
+ detail="Email is already registered",
66
+ )
67
+
68
+ try :
69
+ password_hash = bcrypt_context.hash(create_user_request.password)
70
+
71
+ create_user_model = User(
72
+ name=create_user_request.name,
73
+ username=create_user_request.username,
74
+ email=create_user_request.email,
75
+ role_id=create_user_request.role_id,
76
+ password_hash=password_hash,
77
+ )
78
+
79
+ db.add(create_user_model)
80
+ db.commit()
81
+ db.refresh( create_user_model)
82
+
83
+ return {"message": "User created successfully", "user_id": create_user_model.id}
84
+ except Exception as e:
85
+ print(e)
86
+ raise HTTPException(
87
+ status_code=500, detail="An error occuring when register user"
88
+ )
89
+
90
+
91
+
92
+ @router.post("/forgot_password")
93
+ async def forget_password():
94
+ pass
95
+
96
+
97
+ @router.post("/change_password")
98
+ async def change_password(
99
+ user: user_dependency, db: db_dependency, user_verification: UserVerification
100
+ ):
101
+ if user is None:
102
+ raise HTTPException(status_code=401, detail="Authentication Failed")
103
+ user_model = db.query(User).filter(User.id == user.get("id")).first()
104
+
105
+ if not bcrypt_context.verify(
106
+ user_verification.password, user_model.hashed_password
107
+ ):
108
+ raise HTTPException(status_code=401, detail="Error on password change")
109
+
110
+ user_model.hashed_password = bcrypt_context.hash(user_verification.new_password)
111
+ db.add(user_model)
112
+ db.commit()
113
+ db.refresh(user_model)
114
+
115
+ return {"message": "User's password successfully changed", "user_id": user_model.id}
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi.applications import FastAPI
2
+ from api.router import health, topic, user, bot, trial, role, reader
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from api.events import register_events
5
+ from utils.utils import pipe
6
+
7
+ def create_instance() -> FastAPI:
8
+ return FastAPI()
9
+
10
+ def add_middleware(app: FastAPI) -> FastAPI:
11
+ app.add_middleware(
12
+ CORSMiddleware,
13
+ allow_origins=["*"],
14
+ allow_credentials=True,
15
+ allow_methods=["*"],
16
+ allow_headers=["*"],
17
+ )
18
+ return app
19
+
20
+ def init_database(app: FastAPI) -> FastAPI:
21
+ return app
22
+
23
+ def register_routers(app: FastAPI) -> FastAPI:
24
+ app.include_router(user.router)
25
+ app.include_router(topic.router)
26
+ app.include_router(bot.router)
27
+ app.include_router(trial.router)
28
+ app.include_router(role.router)
29
+ app.include_router(health.router)
30
+
31
+ return app
32
+
33
+
34
+ def init_app() -> FastAPI:
35
+ app: FastAPI = pipe(
36
+ create_instance(), add_middleware, init_database, register_events, register_routers
37
+ )
38
+ return app
39
+
40
+ app = init_app()
config.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings
2
+ import os
3
+
4
+
5
+ class MysqlConfig(BaseSettings):
6
+ DB_HOST: str = ""
7
+ DB_PORT: str = "10707" # Default MySQL port
8
+ DB_URI: str = ""
9
+ DB_USERNAME: str = ""
10
+ DB_PASSWORD: str = ""
11
+ DB_NAME: str = ""
12
+ DB_URI_SQL_ALCHEMY: str = ""
13
+
14
+ class Config:
15
+ env_file = ".env"
16
+ env_file_encoding = "utf-8"
17
+ extra = "allow" # Allow extra fields
18
+
19
+
20
+ class PineconeConfig(BaseSettings):
21
+ PINECONE_API_KEY: str = ""
22
+
23
+ class Config:
24
+ env_file = ".env"
25
+ env_file_encoding = "utf-8"
26
+ extra = "allow" # Allow extra fields
27
+
28
+ class GPTBotConfig(BaseSettings):
29
+ temperature : float = 0.3
30
+ model : str = "gpt-4o-mini"
31
+ max_tokens : int = 512
32
+ streaming : bool = False
33
+ api_key : str = os.environ.get("OPENAI_API_KEY")
34
+
35
+ # Load configuration
36
+ MYSQL_CONFIG = MysqlConfig()
37
+ PINECONE_CONFIG = PineconeConfig()
38
+ GPTBOT_CONFIG = GPTBotConfig()
core/__init__.py ADDED
File without changes
core/book_enabler/__init__.py ADDED
File without changes
core/chat/__init__.py ADDED
File without changes
core/chat/chatstore.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import redis
2
+ import os
3
+ import json
4
+ from fastapi import HTTPException
5
+ from uuid import uuid4
6
+ from typing import Optional, List
7
+ from llama_index.storage.chat_store.redis import RedisChatStore
8
+ from llama_index.core.memory import ChatMemoryBuffer
9
+ from service.dto import ChatMessage
10
+
11
+
12
+ class ChatStore:
13
+ def __init__(self):
14
+ self.redis_client = redis.Redis(
15
+ host="redis-10365.c244.us-east-1-2.ec2.redns.redis-cloud.com",
16
+ port=10365,
17
+ password=os.environ.get("REDIS_PASSWORD"),
18
+ )
19
+
20
+ def generate_uuid(use_hex=False):
21
+ if use_hex:
22
+ return str(uuid4().hex)
23
+ else:
24
+ return str(uuid4())
25
+
26
+ def initialize_memory_bot(self, type, session_id):
27
+ if type == "general":
28
+ chat_store = RedisChatStore(
29
+ redis_client=self.redis_client,
30
+ ttl=3600 # Time-to-live set for 1 hour
31
+ )
32
+ else:
33
+ chat_store = RedisChatStore(
34
+ redis_client=self.redis_client # Regular chat store without TTL
35
+ )
36
+
37
+ # chat_store = SimpleChatStore()
38
+ chat_store = RedisChatStore(
39
+ redis_client=self.redis_client
40
+ ) # Need to be configured
41
+
42
+ memory = ChatMemoryBuffer.from_defaults(
43
+ token_limit=3000, chat_store=chat_store, chat_store_key=session_id
44
+ )
45
+
46
+ return memory
47
+
48
+ def get_messages(self, session_id: str) -> List[dict]:
49
+ """Get messages for a session_id."""
50
+ items = self.redis_client.lrange(session_id, 0, -1)
51
+ if len(items) == 0:
52
+ return []
53
+
54
+ # Decode and parse each item into a dictionary
55
+ return [json.loads(m.decode("utf-8")) for m in items]
56
+
57
+ def delete_last_message(self, session_id: str) -> Optional[ChatMessage]:
58
+ """Delete last message for a session_id."""
59
+ return self.redis_client.rpop(session_id)
60
+
61
+ def delete_messages(self, key: str) -> Optional[List[ChatMessage]]:
62
+ """Delete messages for a key."""
63
+ self.redis_client.delete(key)
64
+ return None
65
+
66
+ def clean_message(self, session_id: str) -> Optional[ChatMessage]:
67
+ """Delete specific message for a session_id."""
68
+ current_list = self.redis_client.lrange(session_id, 0, -1)
69
+
70
+ indices_to_delete = []
71
+ for index, item in enumerate(current_list):
72
+ data = json.loads(item) # Parse JSON string to dict
73
+
74
+ # Logic to determine if item should be removed
75
+ if (data.get("role") == "assistant" and data.get("content") is None) or (data.get("role") == "tool"):
76
+ indices_to_delete.append(index)
77
+
78
+ # Remove elements by their indices in reverse order
79
+ for index in reversed(indices_to_delete):
80
+ self.redis_client.lrem(session_id, 1, current_list[index]) # Remove the element from the list in Redis
81
+
82
+ def get_keys(self) -> List[str]:
83
+ """Get all keys."""
84
+ try :
85
+ print(self.redis_client.keys("*"))
86
+ return [key.decode("utf-8") for key in self.redis_client.keys("*")]
87
+
88
+ except Exception as e:
89
+ # Log the error and raise HTTPException for FastAPI
90
+ print(f"An error occurred in update data.: {e}")
91
+ raise HTTPException(
92
+ status_code=400, detail="the error when get keys"
93
+ )
94
+
95
+ def add_message(self, session_id: str, message: ChatMessage) -> None:
96
+ """Add a message for a session_id."""
97
+ item = json.dumps(self._message_to_dict(message))
98
+ self.redis_client.rpush(session_id, item)
99
+
100
+ def _message_to_dict(self, message: ChatMessage) -> dict:
101
+ return message.model_dump()
core/chat/engine.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from llama_index.core.vector_stores import (
3
+ MetadataFilter,
4
+ MetadataFilters,
5
+ )
6
+
7
+ from llama_index.core.tools import QueryEngineTool, ToolMetadata
8
+ from llama_index.agent.openai import OpenAIAgent
9
+ from llama_index.llms.openai import OpenAI
10
+ from llama_index.core.query_engine import CitationQueryEngine
11
+ from llama_index.core import Settings
12
+ from core.chat.chatstore import ChatStore
13
+
14
+ from config import GPTBOT_CONFIG
15
+ from core.prompt import SYSTEM_BOT_TEMPLATE, ADDITIONAL_INFORMATIONS
16
+ from core.parser import join_list
17
+
18
+
19
+ class Engine:
20
+ def __init__(self):
21
+ self.llm = OpenAI(
22
+ temperature=GPTBOT_CONFIG.temperature,
23
+ model=GPTBOT_CONFIG.model,
24
+ max_tokens=GPTBOT_CONFIG.max_tokens,
25
+ api_key=GPTBOT_CONFIG.api_key,
26
+ )
27
+
28
+ self.chat_store = ChatStore()
29
+
30
+ Settings.llm = self.llm
31
+
32
+ # def _build_description_bot(self, titles):
33
+ # try:
34
+ # title_description = join_list(titles)
35
+
36
+ # prompt = f"""Generate a concise description (up to 1024 characters) for a bot based on the provided `{title_description}`.
37
+ # The description should accurately reflect the specific medical fields or conditions covered by the titles, following this format:
38
+
39
+ # - If the titles are related to orthopedics, the output should be: *A bot containing information on orthopedics.*
40
+ # - If the titles are related to cancer and asthma, the output should be: *A bot containing information on cancer and asthma.*
41
+ # - If the titles are related to internal medicine, the output should be: *A bot containing information on internal medicine.*
42
+
43
+ # Apply this format to match the medical specialties or diseases referenced in `{title_description}`,
44
+ # ensuring the description highlights the relevant topics.
45
+ # """
46
+
47
+ # description_result = str(self.llm.complete(prompt))
48
+
49
+ # return description_result
50
+
51
+ # except Exception as e:
52
+ # return f"Error generating description: {str(e)}"
53
+
54
+ def get_citation_engine(self, titles: List, index):
55
+ filters = [
56
+ MetadataFilter(
57
+ key="title",
58
+ value=title,
59
+ operator="==",
60
+ )
61
+ for title in titles
62
+ ]
63
+
64
+ filters = MetadataFilters(filters=filters, condition="or")
65
+
66
+ # Create the QueryEngineTool with the index and filters
67
+ kwargs = {"similarity_top_k": 5, "filters": filters}
68
+
69
+ retriever = index.as_retriever(**kwargs)
70
+
71
+ # citation_engine = CitationQueryEngine(retriever=retriever)
72
+
73
+ return CitationQueryEngine.from_args(index, retriever=retriever)
74
+
75
+ def get_chat_engine(self, session_id, index, titles=None, type="general"):
76
+ # Create the QueryEngineTool based on the type
77
+ if type == "general":
78
+ # query_engine = index.as_query_engine(similarity_top_k=3)
79
+ citation_engine = CitationQueryEngine.from_args(index, similarity_top_k=5)
80
+ description = "A book containing information about medicine"
81
+ else:
82
+ citation_engine = self.get_citation_engine(titles, index)
83
+ description = "A book containing information about medicine"
84
+
85
+ metadata = ToolMetadata(name="bot-belajar", description=description)
86
+ print(metadata)
87
+
88
+ vector_query_engine = QueryEngineTool(
89
+ query_engine=citation_engine, metadata=metadata
90
+ )
91
+ print(vector_query_engine)
92
+
93
+ # Initialize the OpenAI agent with the tools
94
+
95
+ if type == "general":
96
+ system_prompt = SYSTEM_BOT_TEMPLATE.format(additional_information="")
97
+ else:
98
+ additional_information = ADDITIONAL_INFORMATIONS.format(titles=join_list(titles))
99
+ system_prompt = SYSTEM_BOT_TEMPLATE.format(additional_information=additional_information)
100
+ chat_engine = OpenAIAgent.from_tools(
101
+ tools=[vector_query_engine],
102
+ llm=self.llm,
103
+ memory=self.chat_store.initialize_memory_bot(type, session_id),
104
+ system_prompt=system_prompt,
105
+ )
106
+
107
+ return chat_engine
core/chat/messaging.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Experimental
2
+
3
+ from typing import Dict, Any, Optional, List
4
+ import asyncio
5
+ import logging
6
+ from uuid import uuid4
7
+ from anyio import ClosedResourceError
8
+ from anyio.streams.memory import MemoryObjectSendStream
9
+
10
+ from llama_index.core.callbacks.base import BaseCallbackHandler, CallbackManager
11
+ from llama_index.core.callbacks import CBEventType, EventPayload
12
+ from llama_index.core.query_engine.sub_question_query_engine import (
13
+ SubQuestionAnswerPair,
14
+ )
15
+ from llama_index.core.chat_engine.types import StreamingAgentChatResponse
16
+ from pydantic import BaseModel
17
+
18
+ from core.chat import schema
19
+
20
+ from db.db import MessageSubProcessSourceEnum
21
+ from core.chat.schema import SubProcessMetadataKeysEnum, SubProcessMetadataMap
22
+ from core.chat.engine import Engine
23
+ from script.vector_db import IndexManager
24
+ from service.dto import UserPromptRequest
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ class StreamedMessage(BaseModel):
29
+ content: str
30
+
31
+ async def handle_chat_message(
32
+ user_message: str,
33
+ send_chan: MemoryObjectSendStream,
34
+ ) -> None:
35
+ async with send_chan:
36
+ engine = Engine()
37
+
38
+ index_manager = IndexManager()
39
+ index = index_manager.load_existing_indexes()
40
+
41
+ # Retrieve the chat engine with the loaded index
42
+ chat_engine = await engine.get_chat_engine(index)
43
+
44
+ logger.debug("Engine received")
45
+ streaming_chat_response: StreamingAgentChatResponse = (
46
+ await chat_engine.astream_chat(user_message)
47
+ )
48
+ response_str = ""
49
+ async for text in streaming_chat_response.async_response_gen():
50
+ response_str += text
51
+ if send_chan._closed:
52
+ logger.debug(
53
+ "Received streamed token after send channel closed. Ignoring."
54
+ )
55
+ return
56
+ await send_chan.send(StreamedMessage(content=response_str))
57
+
58
+ if response_str.strip() == "":
59
+ await send_chan.send(
60
+ StreamedMessage(
61
+ content="Sorry, I either wasn't able to understand your question or I don't have an answer for it."
62
+ )
63
+ )
core/chat/schema.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Experimental
2
+
3
+ from pydantic import BaseModel, Field, field_validator
4
+ from typing import List, Optional, Dict, Union, Any
5
+ from enum import Enum
6
+ from uuid import UUID
7
+ from datetime import datetime
8
+ from llama_index.core.schema import BaseNode, NodeWithScore
9
+ from llama_index.core.callbacks.schema import EventPayload
10
+ from llama_index.core.query_engine.sub_question_query_engine import SubQuestionAnswerPair
11
+ from db.db import (
12
+ MessageRoleEnum,
13
+ MessageStatusEnum,
14
+ MessageSubProcessSourceEnum,
15
+ MessageSubProcessStatusEnum,
16
+ )
17
+
18
+ DB_DOC_ID_KEY = "db_document_id"
19
+
20
+ class Base(BaseModel):
21
+ id: Optional[UUID] = Field(None, description="Unique identifier")
22
+ created_at: Optional[datetime] = Field(None, description="Creation datetime")
23
+ updated_at: Optional[datetime] = Field(None, description="Update datetime")
24
+
25
+ class Config:
26
+ orm_mode = True
27
+
28
+ class BaseMetadataObject(BaseModel):
29
+ class Config:
30
+ orm_mode = True
31
+
32
+ class Citation(BaseMetadataObject):
33
+ document_id: UUID
34
+ text: str
35
+ page_number: int
36
+ score: Optional[float]
37
+
38
+ @field_validator("document_id")
39
+ def validate_document_id(cls, value):
40
+ if value:
41
+ return str(value)
42
+ return value
43
+
44
+ @classmethod
45
+ def from_node(cls, node_w_score: NodeWithScore) -> "Citation":
46
+ node: BaseNode = node_w_score.node
47
+ page_number = int(node.source_node.metadata["page_label"])
48
+ document_id = node.source_node.metadata[""]
49
+ return cls(
50
+ document_id=document_id,
51
+ text=node.get_content(),
52
+ page_number=page_number,
53
+ score=node_w_score.score,
54
+ )
55
+
56
+
57
+ class QuestionAnswerPair(BaseMetadataObject):
58
+ """
59
+ A question-answer pair that is used to store the sub-questions and answers
60
+ """
61
+
62
+ question: str
63
+ answer: Optional[str]
64
+ citations: Optional[List[Citation]] = None
65
+
66
+ @classmethod
67
+ def from_sub_question_answer_pair(
68
+ cls, sub_question_answer_pair: SubQuestionAnswerPair
69
+ ):
70
+ if sub_question_answer_pair.sources is None:
71
+ citations = None
72
+ else:
73
+ citations = [
74
+ Citation.from_node(node_w_score)
75
+ for node_w_score in sub_question_answer_pair.sources
76
+ if node_w_score.node.source_node is not None
77
+ and DB_DOC_ID_KEY in node_w_score.node.source_node.metadata
78
+ ]
79
+ citations = citations or None
80
+ return cls(
81
+ question=sub_question_answer_pair.sub_q.sub_question,
82
+ answer=sub_question_answer_pair.answer,
83
+ citations=citations,
84
+ )
85
+
86
+
87
+ # later will be Union[QuestionAnswerPair, more to add later... ]
88
+ class SubProcessMetadataKeysEnum(str, Enum):
89
+ SUB_QUESTION = EventPayload.SUB_QUESTION.value
90
+
91
+
92
+ # keeping the typing pretty loose here, in case there are changes to the metadata data formats.
93
+ SubProcessMetadataMap = Dict[Union[SubProcessMetadataKeysEnum, str], Any]
94
+
95
+
96
+ class MessageSubProcess(Base):
97
+ message_id: UUID
98
+ source: MessageSubProcessSourceEnum
99
+ status: MessageSubProcessStatusEnum
100
+ metadata_map: Optional[SubProcessMetadataMap]
101
+
102
+
103
+ class Message(Base):
104
+ conversation_id: UUID
105
+ content: str
106
+ role: MessageRoleEnum
107
+ status: MessageStatusEnum
108
+ sub_processes: List[MessageSubProcess]
109
+
110
+
111
+ class UserMessageCreate(BaseModel):
112
+ content: str
113
+
114
+ class DocumentMetadataKeysEnum(str, Enum):
115
+ """
116
+ Enum for the keys of the metadata map for a document
117
+ """
118
+
119
+ SEC_DOCUMENT = "sec_document"
120
+
121
+
122
+ class SecDocumentTypeEnum(str, Enum):
123
+ """
124
+ Enum for the type of sec document
125
+ """
126
+
127
+ TEN_K = "10-K"
128
+ TEN_Q = "10-Q"
129
+
130
+
131
+ class SecDocumentMetadata(BaseModel):
132
+ """
133
+ Metadata for a document that is a sec document
134
+ """
135
+
136
+ company_name: str
137
+ company_ticker: str
138
+ doc_type: SecDocumentTypeEnum
139
+ year: int
140
+ quarter: Optional[int]
141
+ accession_number: Optional[str]
142
+ cik: Optional[str]
143
+ period_of_report_date: Optional[datetime]
144
+ filed_as_of_date: Optional[datetime]
145
+ date_as_of_change: Optional[datetime]
146
+
147
+
148
+ DocumentMetadataMap = Dict[Union[DocumentMetadataKeysEnum, str], Any]
149
+
150
+
151
+ class Document(Base):
152
+ url: str
153
+ metadata_map: Optional[DocumentMetadataMap] = None
154
+
155
+
156
+ class Conversation(Base):
157
+ messages: List[Message]
158
+ documents: List[Document]
159
+
160
+
161
+ class ConversationCreate(BaseModel):
162
+ document_ids: List[UUID]
core/parser.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+
4
+ def parse_topics_to_dict(text):
5
+ topics = {}
6
+ lines = text.strip().split("\n")
7
+ current_topic = None
8
+
9
+ topic_pattern = re.compile(r"^\d+\.\s+(.*)$")
10
+ sub_topic_pattern = re.compile(r"^\*\s+(.*)$")
11
+
12
+ for line in lines:
13
+ line = line.strip()
14
+ if topic_pattern.match(line):
15
+ current_topic = topic_pattern.match(line).group(1)
16
+ topics[current_topic] = []
17
+ elif sub_topic_pattern.match(line):
18
+ sub_topic = sub_topic_pattern.match(line).group(1)
19
+ if current_topic:
20
+ topics[current_topic].append(sub_topic)
21
+
22
+ print(topics)
23
+ return topics
24
+
25
+
26
+ def remove_all_sources(text):
27
+ # Construct a regular expression pattern to match all sources
28
+ pattern = r"Source \d+:(.*?)(?=Source \d+:|$)"
29
+
30
+ # Use re.DOTALL to make '.' match newlines and re.IGNORECASE for case-insensitive matching
31
+ updated_text = re.sub(pattern, "", text, flags=re.DOTALL)
32
+
33
+ return updated_text.strip()
34
+
35
+
36
+ def clean_text(text):
37
+ # Replace multiple spaces with a single space
38
+ text = re.sub(r"\s{2,}", " ", text)
39
+ # Remove newline characters that are not followed by a number (to keep lists or numbered points)
40
+ text = re.sub(r"\n(?!\s*\d)", " ", text)
41
+ # Remove unnecessary punctuation (optional, adjust as needed)
42
+ text = re.sub(r";(?=\S)", "", text)
43
+ # Optional: Remove extra spaces around certain characters
44
+ text = re.sub(r"\s*([,;])\s*", r"\1 ", text)
45
+ # Normalize whitespace to a single space
46
+ text = re.sub(r"\s+", " ", text).strip()
47
+
48
+ return text
49
+
50
+
51
+ def update_response(text):
52
+ # Find all the references in the text, e.g., [1], [3], [5]
53
+ responses = re.findall(r"\[\d+\]", text)
54
+
55
+ # Extract the numbers from the responses, and remove duplicates
56
+ ref_numbers = sorted(set(int(respon.strip("[]")) for respon in responses))
57
+
58
+ # Create a mapping from old reference numbers to new ones
59
+ ref_mapping = {old: new for new, old in enumerate(ref_numbers, start=1)}
60
+
61
+ # Replace old responses with the updated responses in the text
62
+ for old, new in ref_mapping.items():
63
+ text = re.sub(rf"\[{old}\]", f"[{new}]", text)
64
+
65
+ return text
66
+
67
+
68
+ def renumber_sources(source_list):
69
+ new_sources = []
70
+ for i, source in enumerate(source_list):
71
+ # Extract the content after the colon
72
+ content = source.split(": ", 1)[1]
73
+ # Add the new source number and content
74
+ new_sources.append(f"source {i+1}: {content}")
75
+ return new_sources
76
+
77
+
78
+ def seperate_to_list(text):
79
+ # Step 1: Split the text by line breaks (\n)
80
+ lines = text.split("\n")
81
+
82
+ # Step 2: Remove occurrences of "source (number):"
83
+ cleaned_lines = [re.sub(r"Source \d+\:", "", line) for line in lines]
84
+
85
+ # Step 3: Split all capital sentences
86
+ final_output = []
87
+ for line in cleaned_lines:
88
+ # Split any fully capitalized sentence (surrounding non-uppercase text remains intact)
89
+ split_line = re.split(r"([A-Z\s]+[.!?])", line)
90
+ final_output.extend([part.strip() for part in split_line if part.strip()])
91
+
92
+ return final_output
93
+
94
+ def join_list(items):
95
+ if not items:
96
+ return ""
97
+ elif len(items) == 1:
98
+ return items[0]
99
+ elif len(items) == 2:
100
+ return f"{items[0]} and {items[1]}"
101
+ else:
102
+ return ", ".join(items[:-1]) + " and " + items[-1]
core/prompt.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SYSTEM_BOT_TEMPLATE = """
2
+ Kamu adalah Medbot yang selalu menggunakan tools kamu untuk menjawab pertanyaan tentang kedokteran. Tugasmu adalah memberikan jawaban yang informatif dan akurat berdasarkan tools yang tersedia. {additional_information} Jika setelah itu tidak ada informasi yang ditemukan, katakan bahwa kamu tidak mengetahuinya dan berikan informasi dari apa yang kamu ketahui kemudian arahkan pengguna untuk bertanya ke dokter yang lebih ahli.
3
+
4
+ **Instruksi**:
5
+
6
+ 1. **Jawaban Berdasarkan Tools**: Jika pengguna bertanya tentang topik kedokteran, gunakanlah tools yang tersedia untuk memberikan jawaban. Pastikan jawabanmu relevan dan sesuai dengan informasi dari tools tersebut.
7
+
8
+ 2. **Referensi dan Kutipan**: Jangan menghapus sumber kutipan dari teks yang diberikan. Contohnya, jika teksnya adalah "Ilmu kedokteran sangat dibutuhkan [2]", pastikan untuk menyertakan kutipan sumbernya yaitu [2] dalam jawabanmu.
9
+
10
+ 3. **Ketika Tidak Tahu Jawaban**: Jika pertanyaan pengguna tidak dapat dijawab dengan menggunakan tools ini, sampaikan dengan sopan bahwa kamu tidak memiliki jawaban untuk pertanyaan tersebut. Arahkan pengguna untuk mencari informasi lebih lanjut atau bertanya pada ahli di bidang kedokteran.
11
+
12
+ 4. **Gaya Jawaban**: Berikan jawaban dengan gaya yang ramah dan profesional. Hindari penggunaan poin-poin, dan sampaikan informasi secara naratif agar lebih mudah dipahami. Gunakan kata 'dok' atau 'dokter' untuk merujuk pada dokter, dan hindari kesan monoton dengan menambahkan emotikon jika sesuai.
13
+
14
+ 5. **Penutup**: Akhiri komunikasi dengan kalimat yang friendly, seperti "Semoga informasi ini bermanfaat, dok ✨" atau "Jika ada pertanyaan lain, jangan ragu untuk bertanya ya dok 😊"
15
+ """
16
+
17
+ ADDITIONAL_INFORMATIONS = """
18
+ Kemudian, kamu menjawab pertanyan user dari buku {titles}, jadi jika user bertaya kamu pastikan akan mengacu buku tersebut yang didapatkan dari tools dari yang kamu punya.
19
+ """
20
+
21
+ SYSTEM_TOPIC_TEMPLATE = """
22
+ You are tasked with analyzing a table of contents from a book. Your goal is to identify and extract the main topics and subtopics. Please provide a clear and organized list of these topics and subtopics. The list should reflect the structure and hierarchy presented in the table of contents.
23
+ """
24
+
25
+ USER_TOPIC_TEMPLATE = """
26
+
27
+ **Task:** Analyze the table of contents of a book to identify the main topics and relevant subtopics.
28
+
29
+ **Instructions:**
30
+
31
+ 1. **Main Topics:** Identify the main topics from the table of contents, excluding sections like background, preface, introduction, and references.
32
+ 2. **Subtopics:** For each main topic, list the related subtopics
33
+
34
+ **Output Format:**
35
+
36
+ 1. **Main Topic 1**
37
+ * Subtopic 1
38
+ * Subtopic 2
39
+ * etc.
40
+
41
+ 2. **Main Topic 2**
42
+ * Subtopic 1
43
+ * Subtopic 2
44
+ * etc.
45
+
46
+ **Important Guidelines:**
47
+
48
+ - Include only relevant main topics and subtopics.
49
+ - Ensure the order of topics and subtopics matches the order displayed in the table of contents.
50
+ - Use the correct format and do not include additional information beyond the main topics and subtopics.
51
+ """
52
+
53
+ REFINED_GET_TOPIC_TEMPLATE = """
54
+ Ensure the following topic and subtopic are provided:
55
+
56
+ {topics}
57
+
58
+ Follow this format :
59
+
60
+ 1. **Main topic 1**
61
+ * Subtopic 1
62
+ * Subtopic 2
63
+ * etc
64
+
65
+ 2. **Main topic 2**
66
+ * Subtopic 1
67
+ * Subtopic 2
68
+ * etc
69
+
70
+ etc
71
+
72
+ Do not add any additional text; only use the specified format.
73
+ """
74
+
75
+ ADD_METADATA_TEMPLATE = """
76
+ **Context for Adding Metadata**
77
+
78
+ {context_str}
79
+
80
+ **Context Structure:**
81
+
82
+ 1. **Main Topic 1**
83
+ * Subtopic 1
84
+ * Subtopic 2
85
+ * etc
86
+
87
+ 2. **Main Topic 2**
88
+ * Subtopic 1
89
+ * Subtopic 2
90
+ * etc
91
+
92
+ **Given:**
93
+ - **Topic and Subtopic:** {{extractor_output}}
94
+
95
+ **Role:**
96
+ Your task is to extract and organize metadata for the {class_name}. Follow the instructions below:
97
+
98
+ **Instructions:**
99
+
100
+ 1. **Extract the Main Topic:**
101
+ - **Goal:** Identify the overarching theme or subject from the provided topic and subtopic.
102
+ - **How:** Look for a theme broad enough to encompass the document's primary focus while remaining specific enough to reflect its core purpose.
103
+ - **Tip:** Ensure the main topic is concise yet descriptive, providing a clear understanding of the document’s primary theme. If the content is general or introductory (e.g., background, preface, introduction, references), categorize it accordingly.
104
+
105
+ 2. **Extract the Key Subtopic (if applicable):**
106
+ - **Goal:** Determine the most relevant supporting element related to the main topic.
107
+ - **How:** Identify a sub-element or detail that provides additional depth or clarification to the main topic.
108
+ - **Tip:** Ensure the subtopic directly supports or elaborates on the main topic.
109
+
110
+ 3. **Handle Cases Without a Clear Subtopic:**
111
+ - **Goal:** If no distinct subtopic is present, set the subtopic to mirror the main topic.
112
+ - **How:** In such cases, consider the main topic comprehensive enough to stand alone without additional subtopics.
113
+
114
+ 4. **Record the Extracted Data:**
115
+ - **Goal:** Organize and record the extracted topic and subtopic within the {class_name} class.
116
+ - **How:** Structure the entries clearly and precisely as attributes of the class.
117
+ - **Tip:** Use precise language to capture the relationship between the main topic and subtopic, ensuring clarity and ease of reference for future use.
118
+ """
119
+
120
+ SUMMARIZER_SYSTEM_TEMPLATE = """
121
+
122
+ """
123
+
124
+ SUMMARIER_HUMAN_TEMPLATE = """
125
+
126
+ """
core/tools.py ADDED
File without changes
db/__init__.py ADDED
File without changes
db/database.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine
2
+ from sqlalchemy.orm import sessionmaker
3
+ from sqlalchemy.exc import OperationalError
4
+ from config import MYSQL_CONFIG
5
+ from fastapi import HTTPException
6
+ import base64
7
+ import os
8
+
9
+ SQLALCHEMY_DATABASE_URL = MYSQL_CONFIG.DB_URI_SQL_ALCHEMY
10
+
11
+ # Get the base64 encoded certificate from the environment variable
12
+ ca_cert_base64 = os.getenv('CA_CERT_BASE64')
13
+
14
+ # Decode the base64 content
15
+ if ca_cert_base64:
16
+ ca_cert_content = base64.b64decode(ca_cert_base64).decode('utf-8')
17
+
18
+ # Write the decoded content to a temporary .pem file
19
+ with open('/tmp/ca.pem', 'w') as f:
20
+ f.write(ca_cert_content)
21
+
22
+ ca_cert_path = '/tmp/ca.pem'
23
+ else:
24
+ raise ValueError("CA_CERT_BASE64 environment variable is not set")
25
+
26
+ # Use the decoded CA certificate in the SQLAlchemy engine
27
+ from sqlalchemy import create_engine
28
+
29
+ engine = create_engine(
30
+ SQLALCHEMY_DATABASE_URL,
31
+ connect_args={
32
+ "ssl": {
33
+ "sslmode": "REQUIRED",
34
+ "ca": ca_cert_path, # Path to the temporary CA certificate
35
+ # Add other SSL options as needed
36
+ }
37
+ },
38
+ )
39
+
40
+ # engine = create_engine(
41
+ # SQLALCHEMY_DATABASE_URL,
42
+ # connect_args={
43
+ # "ssl": {
44
+ # "sslmode": "REQUIRED",
45
+ # "ca": "ca.pem", # Update this path to your CA certificate
46
+ # # Other SSL options can be added here
47
+ # }
48
+ # },
49
+ # )
50
+
51
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
52
+
53
+
54
+ def get_db():
55
+ db = SessionLocal()
56
+ try:
57
+ yield db
58
+ except OperationalError as e:
59
+ # Log the error and raise HTTPException for FastAPI
60
+ print(f"An error occurred in get database sql alchemy.: {e}")
61
+ raise HTTPException(
62
+ status_code=400, detail="Database connection error"
63
+ )
64
+ # Check if it's an authentication-related error
65
+ except Exception as e:
66
+ # Check if it's an authentication-related error
67
+ if "401" in str(e):
68
+ raise HTTPException(
69
+ status_code=401, detail="Authentication failed"
70
+ )
71
+ else:
72
+ # For any other type of exception, raise a generic 400 error
73
+ print(f"An error occurred: {e}")
74
+ raise HTTPException(
75
+ status_code=400, detail="An unexpected error occurred"
76
+ )
db/db.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Experimental
2
+
3
+ from sqlalchemy import Column, String, Enum, ForeignKey, DateTime
4
+ from sqlalchemy.dialects.postgresql import UUID, ENUM, JSONB
5
+ from sqlalchemy.orm import relationship
6
+ from sqlalchemy.sql import func
7
+ from enum import Enum
8
+ from sqlalchemy.ext.declarative import as_declarative, declared_attr
9
+ from llama_index.core.callbacks.schema import CBEventType
10
+
11
+
12
+ # Model
13
+ @as_declarative()
14
+ class Base:
15
+ id = Column(UUID, primary_key=True, index=True, default=func.uuid_generate_v4())
16
+ created_at = Column(DateTime, server_default=func.now(), nullable=False)
17
+ updated_at = Column(
18
+ DateTime, server_default=func.now(), onupdate=func.now(), nullable=False
19
+ )
20
+
21
+ __name__: str
22
+
23
+ # Generate __tablename__ automatically
24
+ @declared_attr
25
+ def __tablename__(cls) -> str:
26
+ return cls.__name__.lower()
27
+
28
+ # DB
29
+ class MessageRoleEnum(str, Enum):
30
+ user = "user"
31
+ assistant = "assistant"
32
+
33
+
34
+ class MessageStatusEnum(str, Enum):
35
+ PENDING = "PENDING"
36
+ SUCCESS = "SUCCESS"
37
+ ERROR = "ERROR"
38
+
39
+
40
+ class MessageSubProcessStatusEnum(str, Enum):
41
+ PENDING = "PENDING"
42
+ FINISHED = "FINISHED"
43
+
44
+
45
+ # python doesn't allow enums to be extended, so we have to do this
46
+ additional_message_subprocess_fields = {
47
+ "CONSTRUCTED_QUERY_ENGINE": "constructed_query_engine",
48
+ "SUB_QUESTIONS": "sub_questions",
49
+ }
50
+ MessageSubProcessSourceEnum = Enum(
51
+ "MessageSubProcessSourceEnum",
52
+ [(event_type.name, event_type.value) for event_type in CBEventType]
53
+ + list(additional_message_subprocess_fields.items()),
54
+ )
55
+
56
+
57
+ def to_pg_enum(enum_class) -> ENUM:
58
+ return ENUM(enum_class, name=enum_class.__name__)
59
+
60
+
61
+ class Document(Base):
62
+ """
63
+ A document along with its metadata
64
+ """
65
+
66
+ # URL to the actual document (e.g. a PDF)
67
+ url = Column(String, nullable=False, unique=True)
68
+ metadata_map = Column(JSONB, nullable=True)
69
+ conversations = relationship("ConversationDocument", back_populates="document")
70
+
71
+
72
+ class Conversation(Base):
73
+ """
74
+ A conversation with messages and linked documents
75
+ """
76
+
77
+ messages = relationship("Message", back_populates="conversation")
78
+ conversation_documents = relationship(
79
+ "ConversationDocument", back_populates="conversation"
80
+ )
81
+
82
+
83
+ class ConversationDocument(Base):
84
+ """
85
+ A many-to-many relationship between a conversation and a document
86
+ """
87
+
88
+ conversation_id = Column(
89
+ UUID(as_uuid=True), ForeignKey("conversation.id"), index=True
90
+ )
91
+ document_id = Column(UUID(as_uuid=True), ForeignKey("document.id"), index=True)
92
+ conversation = relationship("Conversation", back_populates="conversation_documents")
93
+ document = relationship("Document", back_populates="conversations")
94
+
95
+
96
+ class Message(Base):
97
+ """
98
+ A message in a conversation
99
+ """
100
+
101
+ conversation_id = Column(
102
+ UUID(as_uuid=True), ForeignKey("conversation.id"), index=True
103
+ )
104
+ content = Column(String)
105
+ role = Column(to_pg_enum(MessageRoleEnum))
106
+ status = Column(to_pg_enum(MessageStatusEnum), default=MessageStatusEnum.PENDING)
107
+ conversation = relationship("Conversation", back_populates="messages")
108
+ sub_processes = relationship("MessageSubProcess", back_populates="message")
109
+
110
+
111
+ class MessageSubProcess(Base):
112
+ """
113
+ A record of a sub-process that occurred as part of the generation of a message from an AI assistant
114
+ """
115
+
116
+ message_id = Column(UUID(as_uuid=True), ForeignKey("message.id"), index=True)
117
+ source = Column(to_pg_enum(MessageSubProcessSourceEnum))
118
+ message = relationship("Message", back_populates="sub_processes")
119
+ status = Column(
120
+ to_pg_enum(MessageSubProcessStatusEnum),
121
+ default=MessageSubProcessStatusEnum.FINISHED,
122
+ nullable=False,
123
+ )
124
+ metadata_map = Column(JSONB, nullable=True)
db/delete_data.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from db.repository import Repository, get_db_conn
3
+
4
+ # Setup logging (configure as needed)
5
+ logging.basicConfig(level=logging.INFO)
6
+
7
+
8
+ class DeleteDatabase(Repository):
9
+ async def delete_record(self, params):
10
+ if "id" not in params:
11
+ raise ValueError("The 'id' parameter is required.")
12
+ query = """
13
+ DELETE FROM metadata
14
+ WHERE id = :id
15
+ """
16
+
17
+ try:
18
+ await self._exec(query, params)
19
+ logging.info(f"Record with id {params['id']} deleted successfully.")
20
+ except Exception as e:
21
+ logging.error(f"Error deleting record with id {params['id']}: {e}")
22
+ raise
db/get_data.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from db.repository import Repository, get_db_conn
3
+
4
+ # Setup logging (configure as needed)
5
+ logging.basicConfig(level=logging.INFO)
6
+
7
+
8
+ class GetDatabase(Repository):
9
+ async def execute_query(self, query, params=None, fetch_one=False):
10
+ """
11
+
12
+ Helper function to execute SQL queries and handle exceptions.
13
+ """
14
+ try:
15
+ print(fetch_one)
16
+ if fetch_one:
17
+
18
+ results = await self._fetch_one(query, params)
19
+ print(results)
20
+ else:
21
+ results = await self.get_by_query(query, params)
22
+ print("result execute query : ", results)
23
+ return results if results else None
24
+ except Exception as e:
25
+ logging.error(f"An error occurred while executing query: {e}")
26
+ return None
27
+
28
+ async def get_data(self, title):
29
+ """
30
+ Fetch the first result matching the given title from the metadata table.
31
+ """
32
+ query = """
33
+ SELECT * FROM metadata
34
+ WHERE title = %s
35
+ limit 5;
36
+ """
37
+
38
+ try:
39
+ results = await self.execute_query(query, (title,), fetch_one=True)
40
+ return results
41
+ except Exception as e:
42
+ logging.error(f"An error occurred while get data: {e}")
43
+ return None
44
+
45
+ async def get_all_data(self):
46
+ """
47
+ Fetch all data from the metadata table.
48
+ """
49
+ query = """
50
+ SELECT * FROM metadata
51
+ """
52
+ results = await self.execute_query(query)
53
+ return results
54
+
55
+ async def get_data_by_id(self, id):
56
+ query = f"""
57
+ SELECT * FROM metadata WHERE id = :id
58
+ """
59
+
60
+ param = {"id" : id}
61
+ try:
62
+ results = await self.execute_query(query, param)
63
+ print('Query successful, results: %s', results)
64
+ return results[0] if results else None
65
+ except Exception as e:
66
+ print('Error fetching data by ID %s: %s', id, e)
67
+ return None
db/models.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Literal
2
+ from typing_extensions import Annotated
3
+ from typing import Optional
4
+ import uuid
5
+ import datetime
6
+ import sqlalchemy
7
+ from sqlalchemy.orm import mapped_column, relationship
8
+ from sqlalchemy import Integer, String, ForeignKey, UUID,func, Column, CHAR
9
+ from sqlalchemy.orm import DeclarativeBase, Mapped
10
+
11
+ timestamp_current = Annotated[
12
+ datetime.datetime,
13
+ mapped_column(nullable=False, server_default=func.CURRENT_TIMESTAMP()),
14
+ ]
15
+
16
+ timestamp_update = Annotated[
17
+ datetime.datetime,
18
+ mapped_column(nullable=False, server_default=func.CURRENT_TIMESTAMP(), onupdate=func.CURRENT_TIMESTAMP()),
19
+ ]
20
+
21
+ message_role = Literal["user", "assistant"]
22
+
23
+ class Base(DeclarativeBase):
24
+ type_annotation_map = {
25
+ message_role: sqlalchemy.Enum("user", "assistant", name="message_role"),
26
+ }
27
+
28
+ class User(Base):
29
+ __tablename__ = "user"
30
+
31
+ id = mapped_column(Integer, primary_key=True)
32
+ name = mapped_column(String(100), nullable=False)
33
+ username = mapped_column(String(100), unique=True, nullable=False)
34
+ role_id = mapped_column(Integer, ForeignKey("role.id"))
35
+ email = mapped_column(String(100), unique=True, nullable=False)
36
+ password_hash = mapped_column(String(100), nullable=False)
37
+ created_at: Mapped[timestamp_current]
38
+ updated_at : Mapped[timestamp_update]
39
+
40
+ class Feedback(Base):
41
+ __tablename__ = "feedback"
42
+
43
+ id = mapped_column(Integer, primary_key=True)
44
+ user_id = mapped_column(Integer, ForeignKey("user.id"))
45
+ rating = mapped_column(Integer)
46
+ comment = mapped_column(String(1000))
47
+ created_at : Mapped[timestamp_current]
48
+
49
+ class Role(Base):
50
+ __tablename__ = "role"
51
+
52
+ id = mapped_column(Integer, primary_key=True)
53
+ role_name = mapped_column(String(200), nullable=False)
54
+ description = mapped_column(String(200))
55
+
56
+ class User_Role(Base):
57
+ __tablename__ = "user_role"
58
+
59
+ id = mapped_column(Integer, primary_key=True)
60
+ user_id = mapped_column(Integer, ForeignKey("user.id"))
61
+ role_id = mapped_column(Integer, ForeignKey("role.id"))
62
+
63
+ class Bot(Base):
64
+ __tablename__ = "bot"
65
+
66
+ id = mapped_column(Integer, primary_key=True)
67
+ user_id = mapped_column(Integer, ForeignKey("user.id"))
68
+ bot_name = mapped_column(String(200), nullable=False)
69
+ created_at : Mapped[timestamp_current]
70
+
71
+ class Session(Base):
72
+ __tablename__ = "session"
73
+
74
+ id = mapped_column(String(36), primary_key=True, index=True, default=lambda: str(uuid.uuid4())) # Store as string
75
+ user_id = mapped_column(Integer, ForeignKey("user.id"))
76
+ bot_id = mapped_column(Integer, ForeignKey("bot.id"))
77
+ created_at : Mapped[timestamp_current]
78
+
79
+ class Message(Base):
80
+ __tablename__ = "message"
81
+
82
+ id = mapped_column(Integer, primary_key=True)
83
+ session_id = mapped_column(String(36), ForeignKey("session.id"), nullable=False) # Store as string
84
+ role : Mapped[message_role]
85
+ goal = mapped_column(String(200))
86
+ created_at : Mapped[timestamp_current]
87
+
88
+ class Category(Base):
89
+ __tablename__ = "category"
90
+
91
+ id = mapped_column(Integer, primary_key=True)
92
+ category = mapped_column(String(200))
93
+ created_at : Mapped[timestamp_current]
94
+
95
+ class Metadata(Base):
96
+ __tablename__ = "metadata"
97
+
98
+ id = mapped_column(Integer, primary_key=True)
99
+ title = mapped_column(String(200))
100
+ category_id = mapped_column(Integer, ForeignKey("category.id"))
101
+ author = mapped_column(String(200))
102
+ year = mapped_column(Integer)
103
+ publisher = mapped_column(String(100))
104
+ created_at : Mapped[timestamp_current]
105
+ updated_at : Mapped[timestamp_update]
106
+
107
+
108
+ class Bot_Meta(Base):
109
+ __tablename__ = "bot_meta"
110
+
111
+ id = mapped_column(Integer, primary_key=True)
112
+ bot_id = mapped_column(Integer, ForeignKey("bot.id"))
113
+ metadata_id = mapped_column(Integer, ForeignKey("metadata.id"))
db/repository.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from databases import Database
2
+ import datetime
3
+
4
+
5
+ def get_db_conn(config):
6
+ db_url = f"{config.DB_URI}"
7
+ return Database(db_url)
8
+
9
+
10
+ class Repository:
11
+ def __init__(self, db_conn):
12
+ self.db_conn = db_conn
13
+
14
+ async def get_by_query(self, query, param):
15
+ results = await self.db_conn.fetch_all(query, param)
16
+ print("result get _by query", results)
17
+ return [dict(result) for result in results]
18
+
19
+ async def _fetch_one(self, query, param):
20
+ result = await self.db_conn.fetch_one(query, param)
21
+ return dict(result) if result is not None else result
22
+
23
+ async def _exec(self, query, param):
24
+ return await self.db_conn.execute(query, param)
25
+
26
+ async def _exec_many(self, query, params):
27
+ return await self.db_conn.execute_many(query, params)
28
+
29
+ def update_params(self, params, update=False):
30
+ current_time = datetime.datetime.now()
31
+ if update == False:
32
+
33
+ params.update({"createdAt": current_time, "updatedAt": current_time})
34
+ else:
35
+ params.update({"updatedAt": current_time})
36
+ return params
db/save_data.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from databases import Database
2
+ import logging
3
+ from dotenv import load_dotenv
4
+ from db.repository import Repository
5
+
6
+
7
+ load_dotenv()
8
+
9
+
10
+ class InsertDatabase(Repository):
11
+
12
+ # Example function to insert data asynchronously
13
+ async def insert_data(self, params, category_id):
14
+ # SQL insert query with named placeholders
15
+ query = """
16
+ INSERT INTO metadata (title, category_id, author, year, publisher)
17
+ VALUES (:title, :category_id, :author, :year, :publisher)
18
+ """
19
+
20
+ reference = {
21
+ "title": params["title"],
22
+ "category_id": category_id, # directly assign category_id
23
+ "author": params["author"],
24
+ "year": params["year"],
25
+ "publisher": params["publisher"]
26
+ }
27
+
28
+
29
+ print(reference)
30
+ try:
31
+ # Execute the query with the provided values
32
+ await self._exec(query, reference)
33
+ logging.info(
34
+ f"Data inserted successfully: {reference['title']}, {reference['author']}"
35
+ )
36
+ except Exception as e:
37
+ # Log any errors that occur during the database insert operation
38
+ logging.error(f"Failed to insert data: {e}")
39
+ raise # Re-raise the exception to allow further handling if needed
db/update_data.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from db.repository import Repository, get_db_conn
3
+
4
+ # Setup logging (configure as needed)
5
+ logging.basicConfig(level=logging.INFO)
6
+
7
+
8
+ class UpdateDatabase(Repository):
9
+ async def update_record(self, reference):
10
+ print("update record", reference)
11
+ if "id" not in reference:
12
+ raise ValueError("The 'id' parameter is required.")
13
+ query = """
14
+ UPDATE metadata
15
+ SET title = :title,
16
+ category_id = :category_id,
17
+ author = :author,
18
+ year = :year,
19
+ publisher = :publisher
20
+ WHERE id = :id
21
+ """
22
+ print(query)
23
+
24
+ print(reference)
25
+
26
+ try:
27
+ await self._exec(query, reference)
28
+ logging.info(
29
+ f"Record with id {reference['id']} updated successfully."
30
+ )
31
+ except Exception as e:
32
+ logging.error(
33
+ f"Error updating record with id {reference['id']}: {e}"
34
+ )
35
+ raise
docker-compose.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ version: "3.10"
2
+ services:
3
+ fastapi_app:
4
+ build: .
5
+ ports:
6
+ - "8000:8000"
7
+ env_file:
8
+ - .env
requirements.txt ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohappyeyeballs==2.4.0
2
+ aiohttp==3.10.5
3
+ aiomysql==0.2.0
4
+ aiosignal==1.3.1
5
+ annotated-types==0.7.0
6
+ anyio==4.4.0
7
+ asgiref==3.8.1
8
+ attrs==24.2.0
9
+ backoff==2.2.1
10
+ bcrypt==4.2.0
11
+ beautifulsoup4==4.12.3
12
+ build==1.2.2
13
+ cachetools==5.5.0
14
+ certifi==2024.8.30
15
+ chardet==5.2.0
16
+ charset-normalizer==3.3.2
17
+ chroma-hnswlib==0.7.6
18
+ chromadb==0.5.7
19
+ click==8.1.7
20
+ coloredlogs==15.0.1
21
+ databases==0.9.0
22
+ dataclasses-json==0.6.7
23
+ Deprecated==1.2.14
24
+ dirtyjson==1.0.8
25
+ distro==1.9.0
26
+ dnspython==1.16.0
27
+ fastapi==0.113.0
28
+ filelock==3.16.1
29
+ flatbuffers==24.3.25
30
+ frozenlist==1.4.1
31
+ fsspec==2024.9.0
32
+ google-auth==2.34.0
33
+ googleapis-common-protos==1.65.0
34
+ greenlet==3.0.3
35
+ grpcio==1.66.1
36
+ h11==0.14.0
37
+ httpcore==1.0.5
38
+ httptools==0.6.1
39
+ httpx==0.27.2
40
+ huggingface-hub==0.25.0
41
+ humanfriendly==10.0
42
+ idna==3.8
43
+ importlib_metadata==8.4.0
44
+ importlib_resources==6.4.5
45
+ Jinja2==3.1.4
46
+ jiter==0.5.0
47
+ joblib==1.4.2
48
+ jsonpatch==1.33
49
+ jsonpointer==3.0.0
50
+ kubernetes==30.1.0
51
+ langchain==0.3.0
52
+ langchain-community==0.3.0
53
+ langchain-core==0.3.1
54
+ langchain-openai==0.2.0
55
+ langchain-text-splitters==0.3.0
56
+ langchainhub==0.1.21
57
+ langfuse==2.48.1
58
+ langsmith==0.1.123
59
+ llama-cloud==0.0.17
60
+ llama-index==0.11.10
61
+ llama-index-agent-openai==0.3.1
62
+ llama-index-callbacks-langfuse==0.2.0
63
+ llama-index-cli==0.3.1
64
+ llama-index-core==0.11.10
65
+ llama-index-embeddings-openai==0.2.4
66
+ llama-index-indices-managed-llama-cloud==0.3.0
67
+ llama-index-legacy==0.9.48.post3
68
+ llama-index-llms-openai==0.2.7
69
+ llama-index-multi-modal-llms-openai==0.2.0
70
+ llama-index-program-openai==0.2.0
71
+ llama-index-question-gen-openai==0.2.0
72
+ llama-index-readers-file==0.2.1
73
+ llama-index-readers-llama-parse==0.3.0
74
+ llama-index-storage-chat-store-redis==0.2.0
75
+ llama-index-vector-stores-pinecone==0.2.1
76
+ llama-parse==0.5.2
77
+ markdown-it-py==3.0.0
78
+ MarkupSafe==2.1.5
79
+ marshmallow==3.22.0
80
+ mdurl==0.1.2
81
+ mmh3==5.0.0
82
+ monotonic==1.6
83
+ mpmath==1.3.0
84
+ multidict==6.0.5
85
+ mypy-extensions==1.0.0
86
+ mysqlclient==2.2.4
87
+ nest_asyncio==1.6.0
88
+ networkx==3.3
89
+ nltk==3.9.1
90
+ numpy==1.26.4
91
+ oauthlib==3.2.2
92
+ onnxruntime==1.19.2
93
+ openai==1.43.1
94
+ opentelemetry-api==1.27.0
95
+ opentelemetry-exporter-otlp-proto-common==1.27.0
96
+ opentelemetry-exporter-otlp-proto-grpc==1.27.0
97
+ opentelemetry-instrumentation==0.48b0
98
+ opentelemetry-instrumentation-asgi==0.48b0
99
+ opentelemetry-instrumentation-fastapi==0.48b0
100
+ opentelemetry-proto==1.27.0
101
+ opentelemetry-sdk==1.27.0
102
+ opentelemetry-semantic-conventions==0.48b0
103
+ opentelemetry-util-http==0.48b0
104
+ orjson==3.10.7
105
+ overrides==7.7.0
106
+ pandas==2.2.2
107
+ pillow==10.4.0
108
+ pinecone-client==5.0.1
109
+ pinecone-plugin-inference==1.0.3
110
+ pinecone-plugin-interface==0.0.7
111
+ posthog==3.6.6
112
+ protobuf==4.25.5
113
+ protoc-gen-openapiv2==0.0.1
114
+ pyasn1==0.6.1
115
+ pyasn1_modules==0.4.1
116
+ pydantic==2.9.0
117
+ pydantic-settings==2.4.0
118
+ pydantic_core==2.23.2
119
+ pymongo==3.11.0
120
+ PyMuPDF==1.24.10
121
+ PyMuPDFb==1.24.10
122
+ PyMySQL==1.1.1
123
+ pypdf==4.3.1
124
+ PyPDF2==3.0.1
125
+ PyPika==0.48.9
126
+ pyproject_hooks==1.1.0
127
+ pyreadline3==3.5.4
128
+ python-dotenv==1.0.1
129
+ python-multipart==0.0.9
130
+ pytz==2024.1
131
+ PyYAML==6.0.2
132
+ redis==5.0.8
133
+ regex==2024.7.24
134
+ requests==2.32.3
135
+ requests-oauthlib==2.0.0
136
+ rich==13.8.1
137
+ rsa==4.9
138
+ shellingham==1.5.4
139
+ sniffio==1.3.1
140
+ soupsieve==2.6
141
+ SQLAlchemy==2.0.34
142
+ sse-starlette==2.1.3
143
+ starlette==0.38.4
144
+ striprtf==0.0.26
145
+ sympy==1.13.3
146
+ tenacity==8.5.0
147
+ tiktoken==0.7.0
148
+ tokenizers==0.20.0
149
+ tqdm==4.66.5
150
+ typer==0.12.5
151
+ types-requests==2.32.0.20240914
152
+ typing-inspect==0.9.0
153
+ tzdata==2024.1
154
+ urllib3==2.2.2
155
+ uvicorn==0.30.6
156
+ watchfiles==0.24.0
157
+ websocket-client==1.8.0
158
+ websockets==13.0.1
159
+ wrapt==1.16.0
160
+ yarl==1.9.11
script/__init__.py ADDED
File without changes
script/document_uploader.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.core.ingestion import IngestionPipeline
2
+ from llama_index.core.extractors import PydanticProgramExtractor
3
+ from llama_index.embeddings.openai import OpenAIEmbedding
4
+ from config import PINECONE_CONFIG
5
+ from pinecone.grpc import PineconeGRPC as Pinecone
6
+ from service.reader import Reader
7
+ from script.get_metadata import Metadata
8
+ from fastapi import UploadFile, HTTPException,status
9
+
10
+ from llama_index.core.node_parser import (
11
+ SentenceSplitter,
12
+ SemanticSplitterNodeParser,
13
+ )
14
+
15
+ # from script.get_topic import extract_topic
16
+
17
+ import logging
18
+ import random
19
+
20
+
21
+ class Uploader:
22
+ # def __init__(self, reference, file: UploadFile, content_table: UploadFile):
23
+ def __init__(self, reference, file: UploadFile):
24
+ self.file = file
25
+ # self.content_table = content_table
26
+ self.reader = Reader()
27
+ self.reference = reference
28
+ self.metadata = Metadata(reference)
29
+
30
+ async def ingest_documents(self, file: UploadFile):
31
+ """Load documents from the storage path."""
32
+ documents = await self.reader.read_from_uploadfile(file)
33
+ print("Banyak document : ", len(documents))
34
+ print("document successfully ingested")
35
+
36
+ return documents
37
+
38
+ def check_existing_metadata(self, pinecone_index, title, random_vector):
39
+ try:
40
+ result = pinecone_index.query(
41
+ vector=random_vector,
42
+ top_k=1,
43
+ filter={
44
+ "title": {"$eq": title},
45
+ },
46
+ )
47
+ return result["matches"]
48
+ except Exception as e:
49
+ raise HTTPException(
50
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
51
+ detail=f"Error check existing metadata {str(e)}",
52
+ )
53
+
54
+ async def process_documents(self):
55
+ # Ingest documents
56
+ print("test")
57
+ documents = await self.ingest_documents(self.file)
58
+ print("documents")
59
+
60
+ # topic_extractor = extract_topic(self.reference, self.content_table)
61
+
62
+ embed_model = OpenAIEmbedding()
63
+
64
+ # Get metadata
65
+ documents_with_metadata = self.metadata.apply_metadata(documents)
66
+ print("documents_with_metadata")
67
+
68
+ # document_filtered = self.filter_document(documents_with_metadata)
69
+
70
+ # Set up the ingestion pipeline
71
+ pipeline = IngestionPipeline(
72
+ transformations=[
73
+ SemanticSplitterNodeParser(
74
+ buffer_size=1,
75
+ breakpoint_percentile_threshold=95,
76
+ embed_model=embed_model,
77
+ ),
78
+ # topic_extractor,
79
+ ]
80
+ )
81
+
82
+ # splitter = SemanticSplitterNodeParser(
83
+ # buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model
84
+ # )
85
+
86
+ # Run the pipeline
87
+ try:
88
+ nodes_with_metadata = pipeline.run(documents=documents_with_metadata)
89
+ # nodes_with_metadata = splitter.get_nodes_from_documents(documents_with_metadata)
90
+ print("Pipeline processing completed updated.")
91
+ return nodes_with_metadata
92
+ except Exception as e:
93
+ # Log the error and raise HTTPException for FastAPI
94
+ logging.error(f"An error occurred in making pipeline: {e}")
95
+ raise HTTPException(
96
+ status_code=500,
97
+ detail="An internal server error occurred making pipeline.",
98
+ )
99
+
100
+ def filter_document(self, documents):
101
+ api_key = PINECONE_CONFIG.PINECONE_API_KEY
102
+ client = Pinecone(api_key=api_key)
103
+ pinecone_index = client.Index("test")
104
+
105
+ random_vector = [random.uniform(0, 1) for _ in range(1536)]
106
+
107
+ filtered_documents = []
108
+ for doc in documents:
109
+ result = self.check_existing_metadata(
110
+ pinecone_index, doc.metadata["title"], random_vector
111
+ )
112
+
113
+ if len(result) == 0:
114
+ filtered_documents.append(doc)
115
+
116
+ return filtered_documents
script/get_metadata.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Get reference
2
+
3
+
4
+ class Metadata:
5
+ def __init__(self, reference):
6
+ self.reference = reference
7
+
8
+ def add_metadata(self, documents, metadata):
9
+ """Add metadata to each document and include page number."""
10
+ print("banyak dokument : ",len(documents))
11
+ for page_number, document in enumerate(documents, start=1):
12
+ # Ensure the document has a metadata attribute
13
+ if not hasattr(document, "metadata") or document.metadata is None:
14
+ document.metadata = {}
15
+
16
+ # Update metadata with page number
17
+ document.metadata["page"] = page_number
18
+ document.metadata.update(metadata)
19
+
20
+ print(f"Metadata added to page {page_number}")
21
+ # self.logger.log_action(f"Metadata added to document {document.id_}", action_type="METADATA")
22
+
23
+ return documents
24
+
25
+ def _generate_metadata(self):
26
+ """Generate metadata and return it."""
27
+ metadata = {
28
+ "title": self.reference["title"],
29
+ "author": self.reference["author"],
30
+ "category": self.reference["category"],
31
+ "year": self.reference["year"],
32
+ "publisher": self.reference["publisher"],
33
+ "reference": f"{self.reference['author']}. ({self.reference['year']}). *{self.reference['title']}*. {self.reference['publisher']}." # APA style reference
34
+ }
35
+ print("metadata is generated")
36
+ return metadata
37
+
38
+ def apply_metadata(self, documents):
39
+ """Apply generated metadata to documents."""
40
+ metadata = self._generate_metadata()
41
+ print("metadata is applied")
42
+ return self.add_metadata(documents, metadata)
script/get_topic.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nest_asyncio
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from jinja2 import Template
5
+ from pydantic import BaseModel, Field
6
+ from pymongo.mongo_client import MongoClient
7
+
8
+ from llama_index.program.openai import OpenAIPydanticProgram
9
+ from llama_index.core.extractors import PydanticProgramExtractor
10
+ from llama_index.llms.openai import OpenAI
11
+
12
+ from core.prompt import ADD_METADATA_TEMPLATE
13
+ from core.summarization.summarizer import SummarizeGenerator
14
+
15
+ nest_asyncio.apply()
16
+
17
+ load_dotenv()
18
+
19
+
20
+ class NodeMetadata(BaseModel):
21
+ """Metadata for nodes, capturing topic and subtopic from the book."""
22
+
23
+ topic: str = Field(
24
+ ...,
25
+ description="The main subject or category that the node is associated with, representing a broad theme within the book.",
26
+ )
27
+ subtopic: str = Field(
28
+ ...,
29
+ description="A more specific aspect or section under the main topic, refining the context of the node within the book.",
30
+ )
31
+
32
+
33
+ def extract_topic(references, content_table):
34
+ uri = os.getenv("MONGO_URI")
35
+ client = MongoClient(uri)
36
+
37
+ try:
38
+ client.admin.command('ping')
39
+ print("Pinged your deployment. You successfully connected to MongoDB!")
40
+ except Exception as e:
41
+ print(e)
42
+ # Access a specific database
43
+ db = client["summarizer"]
44
+
45
+ # Access a collection within the database
46
+ collection = db["topic_collection"]
47
+
48
+ generate_content_table = SummarizeGenerator(references)
49
+ extractor_output, extractor_dics = generate_content_table.extract_content_table(content_table)
50
+ print(extractor_output)
51
+ data_to_insert = {
52
+ "title": references["title"],
53
+ **extractor_dics # Unpack the extractor_output dictionary
54
+ }
55
+
56
+ collection.insert_one(data_to_insert)
57
+
58
+
59
+ add_metadata_template = str(
60
+ Template(ADD_METADATA_TEMPLATE).render(extractor_output=extractor_output)
61
+ )
62
+
63
+ print("add metadata template : ", add_metadata_template)
64
+
65
+ llm = OpenAI(temperature=0.1, model="gpt-4o-mini")
66
+
67
+ openai_program = OpenAIPydanticProgram.from_defaults(
68
+ output_cls=NodeMetadata,
69
+ prompt_template_str="{input}",
70
+ extract_template_str=add_metadata_template,
71
+ llm=llm,
72
+ )
73
+
74
+ topic_extractor = PydanticProgramExtractor(
75
+ program=openai_program,
76
+ input_key="input",
77
+ show_progress=True,
78
+ extract_template_str=add_metadata_template,
79
+ llm=llm,
80
+ )
81
+
82
+ return topic_extractor
script/vector_db.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.core import VectorStoreIndex
2
+ from llama_index.core import StorageContext
3
+ from pinecone import Pinecone, ServerlessSpec
4
+ from llama_index.llms.openai import OpenAI
5
+ from llama_index.vector_stores.pinecone import PineconeVectorStore
6
+ from fastapi import HTTPException, status
7
+ from config import PINECONE_CONFIG
8
+ from math import ceil
9
+ import numpy as np
10
+ import os
11
+ import json
12
+
13
+
14
+ class IndexManager:
15
+ def __init__(self, index_name: str = "summarizer-semantic-index"):
16
+ self.vector_index = None
17
+ self.index_name = index_name
18
+ self.client = self._get_pinecone_client()
19
+ self.pinecone_index = self._create_pinecone_index()
20
+
21
+ def _get_pinecone_client(self):
22
+ """Initialize and return the Pinecone client."""
23
+ # api_key = os.getenv("PINECONE_API_KEY")
24
+ api_key = PINECONE_CONFIG.PINECONE_API_KEY
25
+ if not api_key:
26
+ raise ValueError(
27
+ "Pinecone API key is missing. Please set it in environment variables."
28
+ )
29
+ return Pinecone(api_key=api_key)
30
+
31
+ def _create_pinecone_index(self):
32
+ """Create Pinecone index if it doesn't already exist."""
33
+ if self.index_name not in self.client.list_indexes().names():
34
+ self.client.create_index(
35
+ name=self.index_name,
36
+ dimension=1536,
37
+ metric="cosine",
38
+ spec=ServerlessSpec(cloud="aws", region="us-east-1"),
39
+ )
40
+ return self.client.Index(self.index_name)
41
+
42
+ def _initialize_vector_store(self) -> StorageContext:
43
+ """Initialize and return the vector store with the Pinecone index."""
44
+ vector_store = PineconeVectorStore(pinecone_index=self.pinecone_index)
45
+ return StorageContext.from_defaults(vector_store=vector_store)
46
+
47
+
48
+ def build_indexes(self, nodes):
49
+ """Build vector and tree indexes from nodes."""
50
+ try:
51
+ storage_context = self._initialize_vector_store()
52
+ self.vector_index = VectorStoreIndex(nodes, storage_context=storage_context)
53
+ self.vector_index.set_index_id("vector")
54
+
55
+ print(f"Vector Index ID: {self.vector_index.index_id}")
56
+ print("Vector Index created successfully.")
57
+
58
+ return json.dumps({"status": "success", "message": "Vector Index loaded successfully."})
59
+
60
+ except HTTPException as http_exc:
61
+ raise http_exc # Re-raise HTTPExceptions to ensure FastAPI handles them
62
+ except Exception as e:
63
+ raise HTTPException(
64
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
65
+ detail=f"Error loading existing indexes: {str(e)}"
66
+ )
67
+
68
+ def get_ids_from_query(self, input_vector, title):
69
+ print("Searching Pinecone...")
70
+ print(title)
71
+
72
+ new_ids = set() # Initialize new_ids outside the loop
73
+
74
+ while True:
75
+ results = self.pinecone_index.query(
76
+ vector=input_vector,
77
+ top_k=10000,
78
+ filter={
79
+ "title": {"$eq": f"{title}"},
80
+ },
81
+ )
82
+
83
+ ids = set()
84
+ for result in results['matches']:
85
+ ids.add(result['id'])
86
+ # Check if there's any overlap between ids and new_ids
87
+ if ids.issubset(new_ids):
88
+ break
89
+ else:
90
+ new_ids.update(ids) # Add all new ids to new_ids
91
+
92
+ return new_ids
93
+
94
+
95
+ def get_all_ids_from_index(self, title):
96
+ num_dimensions = 1536
97
+
98
+ num_vectors = self.pinecone_index.describe_index_stats(
99
+ )["total_vector_count"]
100
+
101
+ print("Length of ids list is shorter than the number of total vectors...")
102
+ input_vector = np.random.rand(num_dimensions).tolist()
103
+ print("creating random vector...")
104
+ ids = self.get_ids_from_query(input_vector, title)
105
+ print("getting ids from a vector query...")
106
+
107
+ print("updating ids set...")
108
+ print(f"Collected {len(ids)} ids out of {num_vectors}.")
109
+
110
+ return ids
111
+
112
+ def delete_vector_database(self, old_reference):
113
+ try :
114
+ batch_size = 1000
115
+ all_ids = self.get_all_ids_from_index(old_reference['title'])
116
+ all_ids = list(all_ids)
117
+
118
+ # Split ids into chunks of batch_size
119
+ num_batches = ceil(len(all_ids) / batch_size)
120
+
121
+ for i in range(num_batches):
122
+ # Fetch a batch of IDs
123
+ batch_ids = all_ids[i * batch_size: (i + 1) * batch_size]
124
+ self.pinecone_index.delete(ids=batch_ids)
125
+ print(f"delete from id {i * batch_size} to {(i + 1) * batch_size} successful")
126
+ except Exception as e:
127
+ print(e)
128
+ raise HTTPException(status_code=500, detail="An error occurred while delete metadata")
129
+
130
+ def update_vector_database(self, old_reference, new_reference):
131
+
132
+ reference = new_reference.model_dump()
133
+
134
+ all_ids = self.get_all_ids_from_index(old_reference['title'])
135
+ all_ids = list(all_ids)
136
+
137
+ for id in all_ids:
138
+ self.pinecone_index.update(
139
+ id=id,
140
+ set_metadata=reference
141
+ )
142
+
143
+
144
+ def load_existing_indexes(self):
145
+ """Load existing indexes from Pinecone."""
146
+ try:
147
+ client = self._get_pinecone_client()
148
+ pinecone_index = client.Index(self.index_name)
149
+
150
+ vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
151
+ retriever = VectorStoreIndex.from_vector_store(vector_store)
152
+
153
+ print("Existing Vector Index loaded successfully.")
154
+ return retriever
155
+ except Exception as e:
156
+ print(f"Error loading existing indexes: {e}")
157
+ raise
service/__init__.py ADDED
File without changes