Spaces:

Multimedika
/

Bot_Test

Running

App Files Files Community

dsmultimedika commited on Oct 7

Commit

708e223

•

1 Parent(s): 420855c

Init application

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +402 -0
Dockerfile +14 -0
alembic.ini +117 -0
alembic/README +1 -0
alembic/env.py +86 -0
alembic/script.py.mako +26 -0
alembic/versions/404f8a028e0e_add_bot_name.py +30 -0
alembic/versions/426e52aa13aa_migration_description.py +91 -0
alembic/versions/98b1b4a0de39_add_username.py +32 -0
alembic/versions/c818e5b84075_add_role_id.py +108 -0
api/__init__.py +0 -0
api/auth.py +68 -0
api/events.py +30 -0
api/function.py +249 -0
api/router/__init__.py +0 -0
api/router/bot.py +299 -0
api/router/health.py +10 -0
api/router/role.py +20 -0
api/router/topic.py +178 -0
api/router/trial.py +20 -0
api/router/user.py +115 -0
app.py +40 -0
config.py +38 -0
core/__init__.py +0 -0
core/book_enabler/__init__.py +0 -0
core/chat/__init__.py +0 -0
core/chat/chatstore.py +101 -0
core/chat/engine.py +107 -0
core/chat/messaging.py +63 -0
core/chat/schema.py +162 -0
core/parser.py +102 -0
core/prompt.py +126 -0
core/tools.py +0 -0
db/__init__.py +0 -0
db/database.py +76 -0
db/db.py +124 -0
db/delete_data.py +22 -0
db/get_data.py +67 -0
db/models.py +113 -0
db/repository.py +36 -0
db/save_data.py +39 -0
db/update_data.py +35 -0
docker-compose.yml +8 -0
requirements.txt +160 -0
script/__init__.py +0 -0
script/document_uploader.py +116 -0
script/get_metadata.py +42 -0
script/get_topic.py +82 -0
script/vector_db.py +157 -0
service/__init__.py +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,402 @@

+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+##
+## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
+# User-specific files
+*.rsuser
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+# User-specific files (MonoDevelop/Xamarin Studio)
+*.userprefs
+# Mono auto generated files
+mono_crash.*
+# Build results
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+[Rr]eleases/
+x64/
+x86/
+[Ww][Ii][Nn]32/
+[Aa][Rr][Mm]/
+[Aa][Rr][Mm]64/
+bld/
+[Bb]in/
+[Oo]bj/
+[Ll]og/
+[Ll]ogs/
+# Visual Studio 2015/2017 cache/options directory
+.vs/
+# Uncomment if you have tasks that create the project's static files in wwwroot
+#wwwroot/
+# Visual Studio 2017 auto generated files
+Generated\ Files/
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+# NUnit
+*.VisualState.xml
+TestResult.xml
+nunit-*.xml
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+# Benchmark Results
+BenchmarkDotNet.Artifacts/
+# .NET Core
+project.lock.json
+project.fragment.lock.json
+artifacts/
+# ASP.NET Scaffolding
+ScaffoldingReadMe.txt
+# StyleCop
+StyleCopReport.xml
+# Files built by Visual Studio
+*_i.c
+*_p.c
+*_h.h
+*.ilk
+*.meta
+*.obj
+*.iobj
+*.pch
+*.pdb
+*.ipdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*_wpftmp.csproj
+*.log
+*.tlog
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+# Chutzpah Test files
+_Chutzpah*
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opendb
+*.opensdf
+*.sdf
+*.cachefile
+*.VC.db
+*.VC.VC.opendb
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+*.sap
+# Visual Studio Trace Files
+*.e2e
+# TFS 2012 Local Workspace
+$tf/
+# Guidance Automation Toolkit
+*.gpState
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+*.DotSettings.user
+# TeamCity is a build add-in
+_TeamCity*
+# DotCover is a Code Coverage Tool
+*.dotCover
+# AxoCover is a Code Coverage Tool
+.axoCover/*
+!.axoCover/settings.json
+# Coverlet is a free, cross platform Code Coverage Tool
+coverage*.json
+coverage*.xml
+coverage*.info
+# Visual Studio code coverage results
+*.coverage
+*.coveragexml
+# NCrunch
+_NCrunch_*
+.*crunch*.local.xml
+nCrunchTemp_*
+# MightyMoose
+*.mm.*
+AutoTest.Net/
+# Web workbench (sass)
+.sass-cache/
+# Installshield output folder
+[Ee]xpress/
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+# Click-Once directory
+publish/
+# Publish Web Output
+*.[Pp]ublish.xml
+*.azurePubxml
+# Note: Comment the next line if you want to checkin your web deploy settings,
+# but database connection strings (with potential passwords) will be unencrypted
+*.pubxml
+*.publishproj
+# Microsoft Azure Web App publish settings. Comment the next line if you want to
+# checkin your Azure Web App publish settings, but sensitive information contained
+# in these scripts will be unencrypted
+PublishScripts/
+# NuGet Packages
+*.nupkg
+# NuGet Symbol Packages
+*.snupkg
+# The packages folder can be ignored because of Package Restore
+**/[Pp]ackages/*
+# except build/, which is used as an MSBuild target.
+!**/[Pp]ackages/build/
+# Uncomment if necessary however generally it will be regenerated when needed
+#!**/[Pp]ackages/repositories.config
+# NuGet v3's project.json files produces more ignorable files
+*.nuget.props
+*.nuget.targets
+# Microsoft Azure Build Output
+csx/
+*.build.csdef
+# Microsoft Azure Emulator
+ecf/
+rcf/
+# Windows Store app package directories and files
+AppPackages/
+BundleArtifacts/
+Package.StoreAssociation.xml
+_pkginfo.txt
+*.appx
+*.appxbundle
+*.appxupload
+# Visual Studio cache files
+# files ending in .cache can be ignored
+*.[Cc]ache
+# but keep track of directories ending in .cache
+!?*.[Cc]ache/
+# Others
+ClientBin/
+~$*
+*~
+*.dbmdl
+*.dbproj.schemaview
+*.jfm
+*.pfx
+*.publishsettings
+orleans.codegen.cs
+# Including strong name files can present a security risk
+# (https://github.com/github/gitignore/pull/2483#issue-259490424)
+#*.snk
+# Since there are multiple workflows, uncomment next line to ignore bower_components
+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
+#bower_components/
+# RIA/Silverlight projects
+Generated_Code/
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+ServiceFabricBackup/
+*.rptproj.bak
+# SQL Server files
+*.mdf
+*.ldf
+*.ndf
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+*.rptproj.rsuser
+*- [Bb]ackup.rdl
+*- [Bb]ackup ([0-9]).rdl
+*- [Bb]ackup ([0-9][0-9]).rdl
+# Microsoft Fakes
+FakesAssemblies/
+# GhostDoc plugin setting file
+*.GhostDoc.xml
+# Node.js Tools for Visual Studio
+.ntvs_analysis.dat
+node_modules/
+# Visual Studio 6 build log
+*.plg
+# Visual Studio 6 workspace options file
+*.opt
+# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
+*.vbw
+# Visual Studio 6 auto-generated project file (contains which files were open etc.)
+*.vbp
+# Visual Studio 6 workspace and project file (working project files containing files to include in project)
+*.dsw
+*.dsp
+# Visual Studio 6 technical files
+*.ncb
+*.aps
+# Visual Studio LightSwitch build output
+**/*.HTMLClient/GeneratedArtifacts
+**/*.DesktopClient/GeneratedArtifacts
+**/*.DesktopClient/ModelManifest.xml
+**/*.Server/GeneratedArtifacts
+**/*.Server/ModelManifest.xml
+_Pvt_Extensions
+# Paket dependency manager
+.paket/paket.exe
+paket-files/
+# FAKE - F# Make
+.fake/
+# CodeRush personal settings
+.cr/personal
+# Python Tools for Visual Studio (PTVS)
+__pycache__/
+*.pyc
+# Cake - Uncomment if you are using it
+# tools/**
+# !tools/packages.config
+# Tabs Studio
+*.tss
+# Telerik's JustMock configuration file
+*.jmconfig
+# BizTalk build output
+*.btp.cs
+*.btm.cs
+*.odx.cs
+*.xsd.cs
+# OpenCover UI analysis results
+OpenCover/
+# Azure Stream Analytics local run output
+ASALocalRun/
+# MSBuild Binary and Structured Log
+*.binlog
+# NVidia Nsight GPU debugger configuration file
+*.nvuser
+# MFractors (Xamarin productivity tool) working folder
+.mfractor/
+# Local History for Visual Studio
+.localhistory/
+# Visual Studio History (VSHistory) files
+.vshistory/
+# BeatPulse healthcheck temp database
+healthchecksdb
+# Backup folder for Package Reference Convert tool in Visual Studio 2017
+MigrationBackup/
+# Ionide (cross platform F# VS Code tools) working folder
+.ionide/
+# Fody - auto-generated XML schema
+FodyWeavers.xsd
+# VS Code files for those working on multiple tools
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+*.code-workspace
+# Local History for Visual Studio Code
+.history/
+# Windows Installer files from build outputs
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+# JetBrains Rider
+*.sln.iml
+.env
+*.pem
+*.base64

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+# Use the official Python image from Docker Hub
+FROM python:3.11
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

alembic.ini ADDED Viewed

	@@ -0,0 +1,117 @@

+# A generic, single database configuration.
+[alembic]
+# path to migration scripts
+# Use forward slashes (/) also on windows to provide an os agnostic path
+script_location = alembic
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.
+prepend_sys_path = .
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python>=3.9 or backports.zoneinfo library.
+# Any required deps can installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+# max length of characters to apply to the "slug" field
+# truncate_slug_length = 40
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+# version location specification; This defaults
+# to alembic/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "version_path_separator" below.
+# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
+# version path separator; As mentioned above, this is the character used to split
+# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
+# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
+# Valid values for version_path_separator are:
+#
+# version_path_separator = :
+# version_path_separator = ;
+# version_path_separator = space
+# version_path_separator = newline
+version_path_separator = os  # Use os.pathsep. Default configuration used for new projects.
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+sqlalchemy.url = placeholder_url
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = %(here)s/.venv/bin/ruff
+# ruff.options = --fix REVISION_SCRIPT_FILENAME
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+[handlers]
+keys = console
+[formatters]
+keys = generic
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S

alembic/README ADDED Viewed

	@@ -0,0 +1 @@


1	+ Generic single-database configuration.

alembic/env.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from logging.config import fileConfig
+from sqlalchemy import engine_from_config
+from sqlalchemy import pool
+from dotenv import load_dotenv
+from alembic import context
+from db.models import Base
+import os
+load_dotenv()
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+config.set_main_option(
+    "sqlalchemy.url", os.getenv("DB_URI_SQL_ALCHEMY")
+)
+# add your model's MetaData object here
+# for 'autogenerate' support
+# from myapp import mymodel
+# target_metadata = mymodel.Base.metadata
+target_metadata = Base.metadata
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+    Calls to context.execute() here emit the given string to the
+    script output.
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+    with context.begin_transaction():
+        context.run_migrations()
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode.
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+    """
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection, target_metadata=target_metadata
+        )
+        with context.begin_transaction():
+            context.run_migrations()
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()

alembic/script.py.mako ADDED Viewed

	@@ -0,0 +1,26 @@

+"""${message}
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}

alembic/versions/404f8a028e0e_add_bot_name.py ADDED Viewed

	@@ -0,0 +1,30 @@

+"""add bot_name
+Revision ID: 404f8a028e0e
+Revises: 98b1b4a0de39
+Create Date: 2024-10-04 14:03:44.098762
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+# revision identifiers, used by Alembic.
+revision: str = '404f8a028e0e'
+down_revision: Union[str, None] = '98b1b4a0de39'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('bot', sa.Column('bot_name', sa.String(length=200), nullable=False))
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('bot', 'bot_name')
+    # ### end Alembic commands ###

alembic/versions/426e52aa13aa_migration_description.py ADDED Viewed

	@@ -0,0 +1,91 @@

+"""migration description
+Revision ID: 426e52aa13aa
+Revises:
+Create Date: 2024-10-02 14:32:47.859996
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+# revision identifiers, used by Alembic.
+revision: str = '426e52aa13aa'
+down_revision: Union[str, None] = None
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('Category')
+    op.drop_table('Role')
+    op.drop_table('Metadata')
+    op.drop_table('_prisma_migrations')
+    op.drop_index('email', table_name='User')
+    op.drop_table('User')
+    op.add_column('bot', sa.Column('user_id', sa.Integer(), nullable=True))
+    op.add_column('metadata', sa.Column('title', sa.String(length=100), nullable=True))
+    op.create_foreign_key(None, 'bot', 'user', ['user_id'], ['id'])
+    op.alter_column('session', 'id',
+               existing_type=mysql.CHAR(length=36),
+               type_=sa.String(length=36),
+               existing_nullable=False)
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.alter_column('session', 'id',
+               existing_type=sa.String(length=36),
+               type_=mysql.CHAR(length=36),
+               existing_nullable=False)
+    op.drop_constraint(None, 'bot', type_='foreignkey')
+    op.drop_column('bot', 'user_id')
+    op.create_table('User',
+    sa.Column('id', mysql.INTEGER(), autoincrement=True, nullable=False),
+    sa.Column('name', mysql.VARCHAR(length=50), nullable=False),
+    sa.Column('email', mysql.VARCHAR(length=100), nullable=False),
+    sa.Column('password_hash', mysql.VARCHAR(length=100), nullable=False),
+    sa.Column('created_at', mysql.DATETIME(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+    sa.Column('updated_at', mysql.DATETIME(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+    sa.PrimaryKeyConstraint('id')
+    )
+    op.create_index('email', 'User', ['email'], unique=True)
+    op.create_table('_prisma_migrations',
+    sa.Column('id', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=36), nullable=False),
+    sa.Column('checksum', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=64), nullable=False),
+    sa.Column('finished_at', mysql.DATETIME(fsp=3), nullable=True),
+    sa.Column('migration_name', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=255), nullable=False),
+    sa.Column('logs', mysql.TEXT(collation='utf8mb4_unicode_ci'), nullable=True),
+    sa.Column('rolled_back_at', mysql.DATETIME(fsp=3), nullable=True),
+    sa.Column('started_at', mysql.DATETIME(fsp=3), server_default=sa.text('CURRENT_TIMESTAMP(3)'), nullable=False),
+    sa.Column('applied_steps_count', mysql.INTEGER(unsigned=True), server_default=sa.text("'0'"), autoincrement=False, nullable=False),
+    sa.PrimaryKeyConstraint('id')
+    )
+    op.create_table('Metadata',
+    sa.Column('id', mysql.INTEGER(), autoincrement=True, nullable=False),
+    sa.Column('title', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=191), nullable=False),
+    sa.Column('category', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=191), nullable=False),
+    sa.Column('author', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=191), nullable=False),
+    sa.Column('year', mysql.INTEGER(), autoincrement=False, nullable=False),
+    sa.Column('publisher', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=191), nullable=False),
+    sa.Column('createdAt', mysql.DATETIME(fsp=3), server_default=sa.text('CURRENT_TIMESTAMP(3)'), nullable=False),
+    sa.Column('updatedAt', mysql.DATETIME(fsp=3), nullable=False),
+    sa.PrimaryKeyConstraint('id')
+    )
+    op.create_table('Role',
+    sa.Column('id', mysql.INTEGER(), autoincrement=True, nullable=False),
+    sa.Column('role_name', mysql.VARCHAR(length=100), nullable=False),
+    sa.Column('description', mysql.VARCHAR(length=100), nullable=True),
+    sa.PrimaryKeyConstraint('id')
+    )
+    op.create_table('Category',
+    sa.Column('id', mysql.INTEGER(), autoincrement=True, nullable=False),
+    sa.Column('category', mysql.VARCHAR(length=100), nullable=True),
+    sa.Column('created_at', mysql.DATETIME(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+    sa.PrimaryKeyConstraint('id')
+    )
+    # ### end Alembic commands ###

alembic/versions/98b1b4a0de39_add_username.py ADDED Viewed

	@@ -0,0 +1,32 @@

+"""add username
+Revision ID: 98b1b4a0de39
+Revises: c818e5b84075
+Create Date: 2024-10-03 14:27:43.877725
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+# revision identifiers, used by Alembic.
+revision: str = '98b1b4a0de39'
+down_revision: Union[str, None] = 'c818e5b84075'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('user', sa.Column('username', sa.String(length=100), nullable=False))
+    op.create_unique_constraint(None, 'user', ['username'])
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_constraint(None, 'user', type_='unique')
+    op.drop_column('user', 'username')
+    # ### end Alembic commands ###

alembic/versions/c818e5b84075_add_role_id.py ADDED Viewed

	@@ -0,0 +1,108 @@

+"""add role id
+Revision ID: c818e5b84075
+Revises: 426e52aa13aa
+Create Date: 2024-10-03 09:35:53.882054
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+# revision identifiers, used by Alembic.
+revision: str = 'c818e5b84075'
+down_revision: Union[str, None] = '426e52aa13aa'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('_prisma_migrations')
+    op.alter_column('category', 'category',
+               existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
+               type_=sa.String(length=200),
+               existing_nullable=True)
+    op.alter_column('feedback', 'comment',
+               existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
+               type_=sa.String(length=1000),
+               existing_nullable=True)
+    op.alter_column('message', 'goal',
+               existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
+               type_=sa.String(length=200),
+               existing_nullable=True)
+    op.alter_column('metadata', 'title',
+               existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
+               type_=sa.String(length=200),
+               existing_nullable=True)
+    op.alter_column('metadata', 'author',
+               existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
+               type_=sa.String(length=200),
+               existing_nullable=True)
+    op.alter_column('role', 'role_name',
+               existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
+               type_=sa.String(length=200),
+               existing_nullable=False)
+    op.alter_column('role', 'description',
+               existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
+               type_=sa.String(length=200),
+               existing_nullable=True)
+    op.add_column('user', sa.Column('role_id', sa.Integer(), nullable=True))
+    op.alter_column('user', 'name',
+               existing_type=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=50),
+               type_=sa.String(length=100),
+               existing_nullable=False)
+    op.create_foreign_key(None, 'user', 'role', ['role_id'], ['id'])
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_constraint(None, 'user', type_='foreignkey')
+    op.alter_column('user', 'name',
+               existing_type=sa.String(length=100),
+               type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=50),
+               existing_nullable=False)
+    op.drop_column('user', 'role_id')
+    op.alter_column('role', 'description',
+               existing_type=sa.String(length=200),
+               type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
+               existing_nullable=True)
+    op.alter_column('role', 'role_name',
+               existing_type=sa.String(length=200),
+               type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
+               existing_nullable=False)
+    op.alter_column('metadata', 'author',
+               existing_type=sa.String(length=200),
+               type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
+               existing_nullable=True)
+    op.alter_column('metadata', 'title',
+               existing_type=sa.String(length=200),
+               type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
+               existing_nullable=True)
+    op.alter_column('message', 'goal',
+               existing_type=sa.String(length=200),
+               type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
+               existing_nullable=True)
+    op.alter_column('feedback', 'comment',
+               existing_type=sa.String(length=1000),
+               type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
+               existing_nullable=True)
+    op.alter_column('category', 'category',
+               existing_type=sa.String(length=200),
+               type_=mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=100),
+               existing_nullable=True)
+    op.create_table('_prisma_migrations',
+    sa.Column('id', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=36), nullable=False),
+    sa.Column('checksum', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=64), nullable=False),
+    sa.Column('finished_at', mysql.DATETIME(fsp=3), nullable=True),
+    sa.Column('migration_name', mysql.VARCHAR(collation='utf8mb4_unicode_ci', length=255), nullable=False),
+    sa.Column('logs', mysql.TEXT(collation='utf8mb4_unicode_ci'), nullable=True),
+    sa.Column('rolled_back_at', mysql.DATETIME(fsp=3), nullable=True),
+    sa.Column('started_at', mysql.DATETIME(fsp=3), server_default=sa.text('CURRENT_TIMESTAMP(3)'), nullable=False),
+    sa.Column('applied_steps_count', mysql.INTEGER(unsigned=True), server_default=sa.text("'0'"), autoincrement=False, nullable=False),
+    sa.PrimaryKeyConstraint('id')
+    )
+    # ### end Alembic commands ###

api/__init__.py ADDED Viewed

File without changes

api/auth.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from fastapi import APIRouter, Depends, HTTPException, Request
+from fastapi.responses import RedirectResponse
+from pydantic import BaseModel
+from sqlalchemy.orm import Session
+from db.models import User
+from starlette import status
+from datetime import timedelta, datetime, timezone
+from db.database import get_db
+from passlib.context import CryptContext
+from typing import Annotated
+from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
+from jose import jwt, JWTError
+# Define OAuth2Password flow for Swagger
+oauth2_scheme = OAuth2PasswordBearer(tokenUrl="login")
+# Custom OAuth2 request form to accept email, username, password, and role_id
+router = APIRouter(prefix="/auth", tags=["auth"])
+SECRET_KEY = "557673909e7ad0c6c702e18550f6432754a55205d209c4c9fb27966bfb844555"
+ALGORITHM = "HS256"
+bcrypt_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
+# Database dependency
+db_dependency = Annotated[Session, Depends(get_db)]
+def authenticate_user(email: str, password: str, db):
+    user = db.query(User).filter(User.email == email).first()
+    if not user:
+        return False
+    if not bcrypt_context.verify(password, user.hashed_password):
+        return False
+    return user
+def create_access_token(
+    username: str, user_id: int, role_id: int, expires_delta: timedelta, email: str
+):
+    encode = {"sub": username, "id": user_id, "role_id": role_id, "email": email}
+    expires = datetime.now(timezone.utc) + expires_delta
+    encode.update({"exp": expires})
+    return jwt.encode(encode, SECRET_KEY, algorithm=ALGORITHM)
+async def get_current_user(token: Annotated[str, Depends(oauth2_scheme)]):
+    try:
+        payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
+        username: str = payload.get('sub')
+        user_id: int = payload.get('id')
+        role_id: int = payload.get('role_id')
+        email: str = payload.get('email')
+        if username is None or user_id is None:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Could not validate user.",
+            )
+        return {"username": username, "id" : user_id, "role_id":role_id, "email":email}
+    except JWTError:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED, detail="Could not validate user."
+        )

api/events.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from fastapi import FastAPI
+from db.models import Base
+from db.database import engine
+from api.router.topic import db_conn
+from llama_index.core import set_global_handler
+from dotenv import load_dotenv
+import os
+load_dotenv()
+async def startup() -> None:
+    Base.metadata.create_all(engine)
+    print("table added")
+    await db_conn.connect()
+    os.environ["LANGFUSE_SECRET_KEY"] = os.getenv("LANGFUSE_SECRET_KEY")
+    os.environ["LANGFUSE_PUBLIC_KEY"] = os.getenv("LANGFUSE_PUBLIC_KEY")
+    os.environ["LANGFUSE_HOST"] = os.getenv("LANGFUSE_HOST")
+    set_global_handler("langfuse")
+async def shutdown() -> None:
+    # await db_conn.disconnect()
+    pass
+def register_events(app: FastAPI) -> FastAPI:
+    app.add_event_handler("startup", startup)
+    app.add_event_handler("shutdown", shutdown)
+    return app

api/function.py ADDED Viewed

	@@ -0,0 +1,249 @@

+from script.vector_db import IndexManager
+from script.document_uploader import Uploader
+from db.save_data import InsertDatabase
+from db.get_data import GetDatabase
+from db.delete_data import DeleteDatabase
+from db.update_data import UpdateDatabase
+from typing import Any
+from fastapi import UploadFile
+from fastapi import HTTPException
+from service.dto import ChatMessage
+from core.chat.engine import Engine
+from core.chat.chatstore import ChatStore
+from core.parser import clean_text, update_response, renumber_sources, seperate_to_list
+from llama_index.core.llms import MessageRole
+from service.dto import BotResponseStreaming
+from service.aws_loader import Loader
+from typing import List
+import logging
+import re
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+async def data_ingestion(db_conn, category_id, reference, file: UploadFile) -> Any:
+    try:
+        # insert_database = InsertDatabase(db_conn)
+        # Insert data into the database
+        # await insert_database.insert_data(reference, category_id)
+        # Upload to AWS
+        file_name = f"{reference['title']}"
+        aws_loader = Loader()
+        file_obj = file
+        aws_loader.upload_to_s3(file_obj, file_name)
+        uploader = Uploader(reference, file)
+        print("uploader : ", uploader)
+        nodes_with_metadata = await uploader.process_documents()
+        # response = json.dumps({"status": "success", "message": "Vector Index loaded successfully."})
+        # Build indexes using IndexManager
+        index = IndexManager()
+        response = index.build_indexes(nodes_with_metadata)
+        return response
+    except Exception as e:
+        # Log the error and raise HTTPException for FastAPI
+        logging.error(f"An error occurred in data ingestion: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail="An internal server error occurred in data ingestion.",
+        )
+async def get_data(db_conn, title="", fetch_all_data=True):
+    get_database = GetDatabase(db_conn)
+    print(get_database)
+    try:
+        if fetch_all_data:
+            results = await get_database.get_all_data()
+            print(results)
+            logging.info("Database fetched all data")
+            return results
+        else:
+            results = await get_database.get_data(title)
+            logging.info("Database fetched one data")
+            return results
+    except Exception as e:
+        # Log the error and raise HTTPException for FastAPI
+        logging.error(f"An error occurred in get data.: {e}")
+        raise HTTPException(
+            status_code=500, detail="An internal server error occurred in get data."
+        )
+async def update_data(id: int, reference, db_conn):
+    update_database = UpdateDatabase(db_conn)
+    try:
+        reference = reference.model_dump()
+        print(reference)
+        reference.update({"id": id})
+        print(reference)
+        await update_database.update_record(reference)
+        response = {"status": "Update Success"}
+        return response
+    except Exception as e:
+        # Log the error and raise HTTPException for FastAPI
+        logging.error(f"An error occurred in update data.: {e}")
+        raise HTTPException(
+            status_code=500, detail="An internal server error occurred in update data."
+        )
+async def delete_data(id: int, db_conn):
+    delete_database = DeleteDatabase(db_conn)
+    try:
+        params = {"id": id}
+        await delete_database.delete_record(params)
+        response = {"status": "Delete Success"}
+        return response
+    except Exception as e:
+        # Log the error and raise HTTPException for FastAPI
+        logging.error(f"An error occurred in get data.: {e}")
+        raise HTTPException(
+            status_code=500, detail="An internal server error occurred in delete data."
+        )
+def generate_completion_non_streaming(
+    session_id, user_request, titles: List = None, type="general"
+):
+    try:
+        engine = Engine()
+        index_manager = IndexManager()
+        chatstore = ChatStore()
+        # Load existing indexes
+        index = index_manager.load_existing_indexes()
+        if type == "general":
+            # Retrieve the chat engine with the loaded index
+            chat_engine = engine.get_chat_engine(session_id, index)
+        else:
+            # Retrieve the chat engine with the loaded index
+            chat_engine = engine.get_chat_engine(session_id, index, titles, type)
+        # Generate completion response
+        response = chat_engine.chat(user_request)
+        sources = response.sources
+        number_reference = list(set(re.findall(r"\[(\d+)\]", str(response))))
+        number_reference_sorted = sorted(number_reference)
+        contents = []
+        metadata_collection = []
+        scores = []
+        if number_reference_sorted:
+            for number in number_reference_sorted:
+                # Konversi number ke integer untuk digunakan sebagai indeks
+                number = int(number)
+                # Pastikan sources tidak kosong dan memiliki elemen yang diperlukan
+                if sources and len(sources) > 0:
+                    node = dict(sources[0])["raw_output"].source_nodes
+                    # Pastikan number valid sebagai indeks
+                    if 0 <= number - 1 < len(node):
+                        content = clean_text(node[number - 1].node.get_text())
+                        contents.append(content)
+                        metadata = dict(node[number - 1].node.metadata)
+                        metadata_collection.append(metadata)
+                        score = node[number - 1].score
+                        scores.append(score)
+                    else:
+                        print(f"Invalid reference number: {number}")
+                else:
+                    print("No sources available")
+        else:
+            print("There are no references")
+        response = update_response(str(response))
+        contents = renumber_sources(contents)
+        # Check the lengths of content and metadata
+        num_content = len(contents)
+        num_metadata = len(metadata_collection)
+        # Add content to metadata
+        for i in range(min(num_content, num_metadata)):
+            metadata_collection[i]["content"] = re.sub(r"source \d+\:", "", contents[i])
+        message = ChatMessage(
+            role=MessageRole.ASSISTANT, content=response, metadata=metadata_collection
+        )
+        chatstore.delete_last_message(session_id)
+        chatstore.add_message(session_id, message)
+        chatstore.clean_message(session_id)
+        return str(response), metadata_collection, scores
+    except Exception as e:
+        # Log the error and raise HTTPException for FastAPI
+        logging.error(f"An error occurred in generate text: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail="An internal server error occurred in generate text.",
+        )
+async def generate_streaming_completion(user_request, chat_engine, session_id):
+    try:
+        engine = Engine()
+        index_manager = IndexManager()
+        # Load existing indexes
+        index = index_manager.load_existing_indexes()
+        # Retrieve the chat engine with the loaded index
+        chat_engine = engine.get_chat_engine(index, session_id)
+        # Generate completion response
+        response = chat_engine.stream_chat(user_request)
+        completed_response = ""
+        for gen in response.response_gen:
+            completed_response += gen  # Concatenate the new string
+            yield BotResponseStreaming(
+                content=gen, completed_content=completed_response
+            )
+        nodes = response.source_nodes
+        for node in nodes:
+            reference = str(clean_text(node.node.get_text()))
+            metadata = dict(node.node.metadata)
+            score = float(node.score)
+            yield BotResponseStreaming(
+                completed_content=completed_response,
+                reference=reference,
+                metadata=metadata,
+                score=score,
+            )
+    except Exception as e:
+        yield {"error": str(e)}
+    except Exception as e:
+        # Log the error and raise HTTPException for FastAPI
+        logging.error(f"An error occurred in generate text: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail="An internal server error occurred in generate text.",
+        )

api/router/__init__.py ADDED Viewed

File without changes

api/router/bot.py ADDED Viewed

	@@ -0,0 +1,299 @@

+from fastapi import APIRouter, HTTPException, Depends
+from service.dto import UserPromptRequest, BotResponse, BotCreateRequest
+from pydantic import BaseModel
+from core.chat.chatstore import ChatStore
+from db.database import get_db
+from db.models import Bot_Meta, Bot, Metadata
+from db.models import Session as SessionModel
+from sqlalchemy.orm import selectinload
+from sqlalchemy import select
+from api.function import (
+    generate_streaming_completion,
+    generate_completion_non_streaming,
+)
+from api.router.user import user_dependency
+from sse_starlette.sse import EventSourceResponse
+from utils.utils import generate_uuid
+from typing import Annotated, List, Optional
+from sqlalchemy.orm import Session
+from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.exc import NoResultFound
+from langfuse.llama_index import LlamaIndexCallbackHandler
+from sqlalchemy import and_
+router = APIRouter(tags=["Bot"])
+db_dependency = Annotated[Session, Depends(get_db)]
+def get_chat_store():
+    return ChatStore()
+@router.post("/bot_general/new")
+async def create_session_general():
+    session_id = generate_uuid()
+    return {"session_id": session_id}
+@router.post("/bot")
+async def create_bot(
+    user: user_dependency,
+    db: db_dependency,
+    bot_request: BotCreateRequest,
+):
+    if user is None:
+        raise HTTPException(status_code=401, detail="Authentication Failed")
+    # Create a new bot entry
+    try:
+        # Create a new bot entry
+        new_bot = Bot(
+            user_id=user.get("id"), bot_name=bot_request.bot_name
+        )  # Assuming user has an 'id' attribute
+        # Add the new bot to the database
+        db.add(new_bot)
+        db.commit()  # Commit the transaction
+        db.refresh(new_bot)  # Optional: Refresh the instance with the database state
+        return {"status": "success", "bot_id": new_bot.id}
+    except SQLAlchemyError as e:
+        db.rollback()  # Roll back the transaction in case of an error
+        raise HTTPException(status_code=500, detail=f"Database error: {str(e)}")
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail=f"An unexpected error occurred: {str(e)}"
+        )
+class BotMetaCreate(BaseModel):
+    metadata_id: List[int]  # List of integers for metadata_id
+@router.post("/meta/{bot_id}")
+async def create_bot_specific(
+    user: user_dependency,
+    db: db_dependency,
+    bot_id: int,
+    metadata_id: List[Optional[int]],  # Use the Pydantic model
+):
+    if user is None:
+        raise HTTPException(status_code=401, detail="Authentication Failed")
+    try:
+        # metadata_id = bot_meta_data.metadata_id
+        print("metadata id = ", metadata_id)
+        # Create BotMeta instances for each metadata_id
+        bot_meta_entries = [
+            Bot_Meta(bot_id=bot_id, metadata_id=mid) for mid in metadata_id
+        ]
+        print(bot_meta_entries)
+        # Insert all entries into the database
+        db.add_all(bot_meta_entries)
+        db.commit()  # Commit the transaction
+    except SQLAlchemyError as e:
+        raise HTTPException(status_code=500, detail=f"Database error: {str(e)}")
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail=f"An unexpected error occurred: {str(e)}"
+        )
+    return {"status": "success", "bot_meta": [entry.id for entry in bot_meta_entries]}
+@router.post("/session/{bot_id}/new")
+async def create_new_session(
+    user: user_dependency, db: db_dependency, bot_id: int
+):
+    # Check if user is authenticated
+    if user is None:
+        raise HTTPException(status_code=401, detail="Authentication Failed")
+    print(user.get('id'))
+    user_id = user.get('id')
+    # Ensure the bot belongs to the user
+    bot_query = select(Bot).where(Bot.id == bot_id, Bot.user_id == user_id)
+    try:
+        bot = db.execute(bot_query).scalar_one()
+    except NoResultFound:
+        raise HTTPException(
+            status_code=404, detail="Bot not found or unauthorized access."
+        )
+    # Generate a new session ID (UUID)
+    try :
+        session_id = generate_uuid()
+        # Create the new session
+        new_session = SessionModel(
+            id=session_id,
+            user_id=user.get('id'),
+            bot_id=bot_id,
+        )
+        db.add(new_session)
+        db.commit()  # Commit the new session to the database
+        return {
+            "session_id": session_id,
+        }
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail=f"An unexpected  in retrieving session id {str(e)}"
+        )
+@router.get("/bot/{session_id}")
+async def get_session_id(
+    user: user_dependency,
+    session_id: str,
+    chat_store: ChatStore = Depends(get_chat_store),
+):
+    if user is None:
+        raise HTTPException(status_code=401, detail="Authentication Failed")
+    chat_history = chat_store.get_messages(session_id)
+    if not chat_history:
+        raise HTTPException(status_code=404, detail="Session not found or empty.")
+    return chat_history
+@router.get("/bot/all/{bot_id}")
+async def get_all_session_ids(user: user_dependency, db: db_dependency, bot_id: int):
+    if user is None:
+        raise HTTPException(status_code=401, detail="Authentication Failed")
+    try:
+        # Query the session IDs based on the user ID
+        print(user.get('id'))
+        print(bot_id)
+        query = (
+            select(SessionModel.id)
+            .where(SessionModel.user_id == user.get('id'), SessionModel.bot_id == bot_id)
+        )
+        result = db.execute(query)
+        session_ids = result.scalars().all()
+        # Convert list of tuples to a simple list
+        print(session_ids)
+        return {"session_ids" : session_ids}
+    except Exception as e:
+        # Log the error and raise HTTPException for FastAPI
+        print(f"An error occurred while fetching session IDs: {e}")
+        raise HTTPException(status_code=400, detail="Error retrieving session IDs")
+@router.post("/bot/{session_id}")
+async def bot_generator_general(
+    session_id: str, user_prompt_request: UserPromptRequest
+):
+    langfuse_callback_handler = LlamaIndexCallbackHandler()
+    langfuse_callback_handler.set_trace_params(user_id="guest", session_id=session_id)
+    if user_prompt_request.streaming:
+        return EventSourceResponse(
+            generate_streaming_completion(
+                session_id,
+                user_prompt_request.prompt,
+            )
+        )
+    else:
+        response, metadata, scores = generate_completion_non_streaming(
+            session_id,
+            user_prompt_request.prompt,
+        )
+        return BotResponse(
+            content=response,
+            metadata=metadata,
+            scores=scores,
+        )
+@router.post("/bot/{bot_id}/{session_id}")
+async def bot_generator_spesific(
+    user: user_dependency,
+    db: db_dependency,
+    bot_id: int,
+    session_id: str,
+    user_prompt_request: UserPromptRequest,
+):
+    if user is None:
+        raise HTTPException(status_code=401, detail="Authentication Failed")
+    langfuse_callback_handler = LlamaIndexCallbackHandler()
+    langfuse_callback_handler.set_trace_params(
+        user_id=user.get('username'), session_id=session_id
+    )
+    # Query to retrieve the titles
+    try:
+        query = (
+            select(Metadata.title)
+            .join(Bot_Meta, Metadata.id == Bot_Meta.metadata_id)
+            .join(SessionModel, Bot_Meta.bot_id == bot_id)
+            .where(SessionModel.user_id == user.get('id'), SessionModel.id == session_id)
+        )
+        result = db.execute(query)
+        titles = result.scalars().all()
+        print(titles)
+    except SQLAlchemyError as e:
+        raise HTTPException(status_code=500, detail=f"Database error: {str(e)}")
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail=f"An unexpected error occurred: {str(e)}"
+        )
+    if user_prompt_request.streaming:
+        return EventSourceResponse(
+            generate_streaming_completion(
+                session_id,
+                user_prompt_request.prompt,
+            )
+        )
+    else:
+        response, metadata, scores = generate_completion_non_streaming(
+            session_id, user_prompt_request.prompt, titles, type="specific"
+        )
+        return BotResponse(
+            content=response,
+            metadata=metadata,
+            scores=scores,
+        )
+@router.delete("/bot/{session_id}")
+async def delete_bot(session_id: str, chat_store: ChatStore = Depends(get_chat_store)):
+    try:
+        chat_store.delete_messages(session_id)
+        return {"info": f"Delete {session_id} successful"}
+    except Exception as e:
+        # Log the error and raise HTTPException for FastAPI
+        print(f"An error occurred in update data.: {e}")
+        raise HTTPException(status_code=400, detail="the error when deleting message")
+# @router.get("/bot/{bot_id}")
+# async def get_favourite_data(user: user_dependency):
+#     pass

api/router/health.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from fastapi import Request
+from fastapi.responses import JSONResponse
+from fastapi.routing import APIRouter
+router = APIRouter(tags=["Health"])
+@router.get("/_health")
+async def health(request: Request):
+    return JSONResponse(dict(status="OK"), status_code=200)

api/router/role.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from fastapi import APIRouter
+router = APIRouter(tags=["Roles"])
+@router.get("/roles")
+async def get_data_roles():
+    pass
+@router.post("/roles")
+async def add_data_roles():
+    pass
+@router.put("/roles/{id}")
+async def update_data_roles():
+    pass
+@router.delete("/roles/{id}")
+async def remove_data_roles():
+    pass

api/router/topic.py ADDED Viewed

	@@ -0,0 +1,178 @@

+from api.function import data_ingestion, get_data, delete_data, update_data
+from api.auth import get_current_user
+from api.router.user import user_dependency
+from fastapi import Form, APIRouter, File, UploadFile, HTTPException, Depends
+from db.repository import get_db_conn
+from db.get_data import GetDatabase
+from db.models import Category, Metadata
+from db.database import get_db
+from langfuse.llama_index import LlamaIndexCallbackHandler
+from config import MYSQL_CONFIG
+from script.vector_db import IndexManager
+from service.dto import MetadataRequest
+from sqlalchemy.orm import Session
+from sqlalchemy.future import select
+from sqlalchemy.exc import SQLAlchemyError
+from typing import Annotated
+router = APIRouter(tags=["Topics"])
+db_conn = get_db_conn(MYSQL_CONFIG)
+get_database = GetDatabase(db_conn)
+index_manager = IndexManager()
+db_dependency = Annotated[Session, Depends(get_db)]
+@router.post("/topic")
+async def upload_file(
+    user: user_dependency,
+    db: db_dependency,
+    title: str = Form(...),
+    author: str = Form(...),
+    category_id: int = Form(...),
+    year: int = Form(...),
+    publisher: str = Form(...),
+    file: UploadFile = File(...),
+):
+    print(user.get('role_id'))
+    # if user is None or user.get('role_id') != 1:
+    if user is None:
+        raise HTTPException(status_code=401, detail='Authentication Failed')
+    try:
+        # Query the category based on category_id
+        category_query = select(Category.category).where(Category.id == category_id)
+        result = db.execute(category_query)
+        category = result.scalar_one_or_none()
+        # Check if the category exists
+        if category is None:
+            raise HTTPException(status_code=404, detail="Category not found")
+        # Construct the reference dictionary
+        reference = {
+            "title": title,
+            "author": author,
+            "category": category,
+            "year": year,
+            "publisher": publisher,
+        }
+    except SQLAlchemyError as db_exc:
+        # Handle any database-related errors (e.g., connection issues, query issues)
+        print(f"Database error: {db_exc}")
+        raise HTTPException(status_code=500, detail="Database error occurred")
+    except Exception as e:
+        # Catch any other general exceptions
+        print(f"Error: {e}")
+        raise HTTPException(status_code=500, detail="An error occurred while processing your request") from e
+    try:
+        # Assuming you have a Langfuse callback handler
+        langfuse_callback_handler = LlamaIndexCallbackHandler()
+        langfuse_callback_handler.set_trace_params(
+            user_id="admin_book_uploaded",
+        )
+        # Process the file and handle data ingestion
+        response = await data_ingestion(db_conn, category_id, reference, file)
+    except Exception as e:
+        # Handle any errors related to file processing or data ingestion
+        print(f"File processing error: {e}")
+        raise HTTPException(status_code=500, detail="File processing error")
+    # Return a successful response with the uploaded filename and response from data ingestion
+    return {"filename": file.filename, "response": response}
+@router.get("/topic")
+async def get_metadata(user: user_dependency,):
+    if user is None or user.get('role_id') != 1:
+        raise HTTPException(status_code=401, detail='Authentication Failed')
+    results = await get_data(db_conn)
+    return results
+@router.put("/topic/{id}")
+async def update_metadata(user: user_dependency, db: db_dependency, id: int, reference: MetadataRequest):
+    if user is None or user.get('role_id') != 1:
+        raise HTTPException(status_code=401, detail='Authentication Failed')
+    try:
+        old_metadata = await get_database.get_data_by_id(id)
+        # Fetch old and new categories
+        old_category = (
+            db.execute(
+                select(Category.category)
+                .join(Metadata)
+                .where(Metadata.id == id)
+            ).scalar_one_or_none()
+        )
+        print("old category", old_category)
+        new_category = (
+            db.execute(
+                select(Category.category)
+                .where(Category.id == reference.category_id)
+            ).scalar_one_or_none()
+        )
+        print("new category", new_category)
+        if old_category is None or new_category is None:
+            raise HTTPException(status_code=404, detail="Category not found.")
+        # Prepare the references
+        old_reference = {
+            "title": old_metadata["title"],
+            "author": old_metadata["author"],
+            "category": old_category,
+            "year": old_metadata["year"],
+            "publisher": old_metadata["publisher"],
+        }
+        print(old_reference)
+        new_reference = {
+            "title": reference.title,
+            "author": reference.author,
+            "category": new_category,
+            "year": reference.year,
+            "publisher": reference.publisher,
+        }
+        print(new_reference)
+        print("reference : ", reference)
+        # index_manager.update_vector_database(old_reference, new_reference)
+        return await update_data(id, reference, db_conn)
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail="An error occurred while updating metadata"
+        )
+@router.delete("/topic/{id}")
+async def delete_metadata(user: user_dependency, id: int):
+    if user is None or user.get('role_id') != 1:
+        raise HTTPException(status_code=401, detail='Authentication Failed')
+    try:
+        old_reference = await get_database.get_data_by_id(id)
+        index_manager.delete_vector_database(old_reference)
+        return await delete_data(id, db_conn)
+        # return {"Status":"success"}
+    except Exception as e:
+        print(e)
+        raise HTTPException(
+            status_code=500, detail="An error occurred while delete metadata"
+        )

api/router/trial.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from fastapi import APIRouter
+router = APIRouter(tags=["Trial"])
+@router.get("/roles")
+async def get_trial_data():
+    pass
+@router.post("/roles")
+async def add_trial_data():
+    pass
+@router.put("/roles/{id}")
+async def update_trial_data():
+    pass
+@router.delete("/roles/{id}")
+async def remove_trial_data():
+    pass

api/router/user.py ADDED Viewed

	@@ -0,0 +1,115 @@

+from fastapi import APIRouter, Depends, HTTPException, status
+from fastapi.security import OAuth2PasswordRequestForm
+from db.models import User
+from db.database import get_db
+from api.auth import get_current_user, create_access_token
+from service.dto import CreateUserRequest, UserVerification, Token
+from typing import Annotated
+from passlib.context import CryptContext
+from sqlalchemy.orm import Session
+from datetime import timedelta
+router = APIRouter(tags=["User"])
+bcrypt_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
+db_dependency = Annotated[Session, Depends(get_db)]
+user_dependency = Annotated[dict, Depends(get_current_user)]
+ACCESS_TOKEN_EXPIRE_MINUTES = 43200
+@router.post("/login", response_model=Token)
+async def login_for_access_token(
+    login_data: Annotated[OAuth2PasswordRequestForm, Depends()], db: Session = Depends(get_db)
+):
+    user = db.query(User).filter(User.username == login_data.username).first()
+    if not user or not bcrypt_context.verify(login_data.password, user.password_hash):
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Incorrect username or password",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    try :
+        access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
+        access_token = create_access_token(
+            user.username ,user.id, user.role_id, access_token_expires, user.email
+        )
+        return {"access_token": access_token, "token_type": "bearer"}
+    except Exception as e:
+        print(e)
+        raise HTTPException(
+            status_code=500, detail="An error occuring when login"
+        )
+@router.get("/login", response_model=dict)
+async def get_user(user: user_dependency):
+    return {"name": user.get('username'), "id" : user.get('id'), "email": user.get('email'), "role": user.get('role_id')}
+@router.post("/register")
+async def register_user(db: db_dependency, create_user_request: CreateUserRequest):
+    existing_user = (
+        db.query(User).filter(User.email == create_user_request.email).first()
+    )
+    if existing_user:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Email is already registered",
+        )
+    try :
+        password_hash = bcrypt_context.hash(create_user_request.password)
+        create_user_model = User(
+            name=create_user_request.name,
+            username=create_user_request.username,
+            email=create_user_request.email,
+            role_id=create_user_request.role_id,
+            password_hash=password_hash,
+        )
+        db.add(create_user_model)
+        db.commit()
+        db.refresh( create_user_model)
+        return {"message": "User created successfully", "user_id": create_user_model.id}
+    except Exception as e:
+        print(e)
+        raise HTTPException(
+            status_code=500, detail="An error occuring when register user"
+        )
+@router.post("/forgot_password")
+async def forget_password():
+    pass
+@router.post("/change_password")
+async def change_password(
+    user: user_dependency, db: db_dependency, user_verification: UserVerification
+):
+    if user is None:
+        raise HTTPException(status_code=401, detail="Authentication Failed")
+    user_model = db.query(User).filter(User.id == user.get("id")).first()
+    if not bcrypt_context.verify(
+        user_verification.password, user_model.hashed_password
+    ):
+        raise HTTPException(status_code=401, detail="Error on password change")
+    user_model.hashed_password = bcrypt_context.hash(user_verification.new_password)
+    db.add(user_model)
+    db.commit()
+    db.refresh(user_model)
+    return {"message": "User's password successfully changed", "user_id": user_model.id}

app.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from fastapi.applications import FastAPI
+from api.router import health, topic, user, bot, trial, role, reader
+from fastapi.middleware.cors import CORSMiddleware
+from api.events import register_events
+from utils.utils import pipe
+def create_instance() -> FastAPI:
+    return FastAPI()
+def add_middleware(app: FastAPI) -> FastAPI:
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    return app
+def init_database(app: FastAPI) -> FastAPI:
+    return app
+def register_routers(app: FastAPI) -> FastAPI:
+    app.include_router(user.router)
+    app.include_router(topic.router)
+    app.include_router(bot.router)
+    app.include_router(trial.router)
+    app.include_router(role.router)
+    app.include_router(health.router)
+    return app
+def init_app() -> FastAPI:
+    app: FastAPI = pipe(
+        create_instance(), add_middleware, init_database, register_events, register_routers
+    )
+    return app
+app = init_app()

config.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from pydantic_settings import BaseSettings
+import os
+class MysqlConfig(BaseSettings):
+    DB_HOST: str = ""
+    DB_PORT: str = "10707"  # Default MySQL port
+    DB_URI: str = ""
+    DB_USERNAME: str = ""
+    DB_PASSWORD: str = ""
+    DB_NAME: str = ""
+    DB_URI_SQL_ALCHEMY: str = ""
+    class Config:
+        env_file = ".env"
+        env_file_encoding = "utf-8"
+        extra = "allow"  # Allow extra fields
+class PineconeConfig(BaseSettings):
+    PINECONE_API_KEY: str = ""
+    class Config:
+        env_file = ".env"
+        env_file_encoding = "utf-8"
+        extra = "allow"  # Allow extra fields
+class GPTBotConfig(BaseSettings):
+    temperature : float = 0.3
+    model : str = "gpt-4o-mini"
+    max_tokens : int = 512
+    streaming : bool = False
+    api_key : str = os.environ.get("OPENAI_API_KEY")
+# Load configuration
+MYSQL_CONFIG = MysqlConfig()
+PINECONE_CONFIG = PineconeConfig()
+GPTBOT_CONFIG = GPTBotConfig()

core/__init__.py ADDED Viewed

File without changes

core/book_enabler/__init__.py ADDED Viewed

File without changes

core/chat/__init__.py ADDED Viewed

File without changes

core/chat/chatstore.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import redis
+import os
+import json
+from fastapi import HTTPException
+from uuid import uuid4
+from typing import Optional, List
+from llama_index.storage.chat_store.redis import RedisChatStore
+from llama_index.core.memory import ChatMemoryBuffer
+from service.dto import ChatMessage
+class ChatStore:
+    def __init__(self):
+        self.redis_client = redis.Redis(
+            host="redis-10365.c244.us-east-1-2.ec2.redns.redis-cloud.com",
+            port=10365,
+            password=os.environ.get("REDIS_PASSWORD"),
+        )
+    def generate_uuid(use_hex=False):
+        if use_hex:
+            return str(uuid4().hex)
+        else:
+            return str(uuid4())
+    def initialize_memory_bot(self, type, session_id):
+        if type == "general":
+            chat_store = RedisChatStore(
+                redis_client=self.redis_client,
+                ttl=3600  # Time-to-live set for 1 hour
+            )
+        else:
+            chat_store = RedisChatStore(
+                redis_client=self.redis_client  # Regular chat store without TTL
+            )
+    # chat_store = SimpleChatStore()
+        chat_store = RedisChatStore(
+            redis_client=self.redis_client
+        )  # Need to be configured
+        memory = ChatMemoryBuffer.from_defaults(
+            token_limit=3000, chat_store=chat_store, chat_store_key=session_id
+        )
+        return memory
+    def get_messages(self, session_id: str) -> List[dict]:
+        """Get messages for a session_id."""
+        items = self.redis_client.lrange(session_id, 0, -1)
+        if len(items) == 0:
+            return []
+        # Decode and parse each item into a dictionary
+        return [json.loads(m.decode("utf-8")) for m in items]
+    def delete_last_message(self, session_id: str) -> Optional[ChatMessage]:
+        """Delete last message for a session_id."""
+        return self.redis_client.rpop(session_id)
+    def delete_messages(self, key: str) -> Optional[List[ChatMessage]]:
+        """Delete messages for a key."""
+        self.redis_client.delete(key)
+        return None
+    def clean_message(self, session_id: str) -> Optional[ChatMessage]:
+        """Delete specific message for a session_id."""
+        current_list = self.redis_client.lrange(session_id, 0, -1)
+        indices_to_delete = []
+        for index, item in enumerate(current_list):
+            data = json.loads(item)  # Parse JSON string to dict
+            # Logic to determine if item should be removed
+            if (data.get("role") == "assistant" and data.get("content") is None) or (data.get("role") == "tool"):
+                indices_to_delete.append(index)
+        # Remove elements by their indices in reverse order
+        for index in reversed(indices_to_delete):
+            self.redis_client.lrem(session_id, 1, current_list[index])  # Remove the element from the list in Redis
+    def get_keys(self) -> List[str]:
+        """Get all keys."""
+        try :
+            print(self.redis_client.keys("*"))
+            return [key.decode("utf-8") for key in self.redis_client.keys("*")]
+        except Exception as e:
+            # Log the error and raise HTTPException for FastAPI
+            print(f"An error occurred in update data.: {e}")
+            raise HTTPException(
+                status_code=400, detail="the error when get keys"
+            )
+    def add_message(self, session_id: str, message: ChatMessage) -> None:
+        """Add a message for a session_id."""
+        item = json.dumps(self._message_to_dict(message))
+        self.redis_client.rpush(session_id, item)
+    def _message_to_dict(self, message: ChatMessage) -> dict:
+        return message.model_dump()

core/chat/engine.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from typing import List
+from llama_index.core.vector_stores import (
+    MetadataFilter,
+    MetadataFilters,
+)
+from llama_index.core.tools import QueryEngineTool, ToolMetadata
+from llama_index.agent.openai import OpenAIAgent
+from llama_index.llms.openai import OpenAI
+from llama_index.core.query_engine import CitationQueryEngine
+from llama_index.core import Settings
+from core.chat.chatstore import ChatStore
+from config import GPTBOT_CONFIG
+from core.prompt import SYSTEM_BOT_TEMPLATE, ADDITIONAL_INFORMATIONS
+from core.parser import join_list
+class Engine:
+    def __init__(self):
+        self.llm = OpenAI(
+            temperature=GPTBOT_CONFIG.temperature,
+            model=GPTBOT_CONFIG.model,
+            max_tokens=GPTBOT_CONFIG.max_tokens,
+            api_key=GPTBOT_CONFIG.api_key,
+        )
+        self.chat_store = ChatStore()
+        Settings.llm = self.llm
+    # def _build_description_bot(self, titles):
+    #     try:
+    #         title_description = join_list(titles)
+    #         prompt = f"""Generate a concise description (up to 1024 characters) for a bot based on the provided `{title_description}`.
+    #                      The description should accurately reflect the specific medical fields or conditions covered by the titles, following this format:
+    #                     - If the titles are related to orthopedics, the output should be: *A bot containing information on orthopedics.*
+    #                     - If the titles are related to cancer and asthma, the output should be: *A bot containing information on cancer and asthma.*
+    #                     - If the titles are related to internal medicine, the output should be: *A bot containing information on internal medicine.*
+    #                     Apply this format to match the medical specialties or diseases referenced in `{title_description}`,
+    #                     ensuring the description highlights the relevant topics.
+    #                   """
+    #         description_result = str(self.llm.complete(prompt))
+    #         return description_result
+    #     except Exception as e:
+    #         return f"Error generating description: {str(e)}"
+    def get_citation_engine(self, titles: List, index):
+        filters = [
+            MetadataFilter(
+                key="title",
+                value=title,
+                operator="==",
+            )
+            for title in titles
+        ]
+        filters = MetadataFilters(filters=filters, condition="or")
+        # Create the QueryEngineTool with the index and filters
+        kwargs = {"similarity_top_k": 5, "filters": filters}
+        retriever = index.as_retriever(**kwargs)
+        # citation_engine = CitationQueryEngine(retriever=retriever)
+        return CitationQueryEngine.from_args(index, retriever=retriever)
+    def get_chat_engine(self, session_id, index, titles=None, type="general"):
+        # Create the QueryEngineTool based on the type
+        if type == "general":
+            # query_engine = index.as_query_engine(similarity_top_k=3)
+            citation_engine = CitationQueryEngine.from_args(index, similarity_top_k=5)
+            description = "A book containing information about medicine"
+        else:
+            citation_engine = self.get_citation_engine(titles, index)
+            description = "A book containing information about medicine"
+        metadata = ToolMetadata(name="bot-belajar", description=description)
+        print(metadata)
+        vector_query_engine = QueryEngineTool(
+            query_engine=citation_engine, metadata=metadata
+        )
+        print(vector_query_engine)
+        # Initialize the OpenAI agent with the tools
+        if type == "general":
+            system_prompt = SYSTEM_BOT_TEMPLATE.format(additional_information="")
+        else:
+            additional_information = ADDITIONAL_INFORMATIONS.format(titles=join_list(titles))
+            system_prompt = SYSTEM_BOT_TEMPLATE.format(additional_information=additional_information)
+        chat_engine = OpenAIAgent.from_tools(
+            tools=[vector_query_engine],
+            llm=self.llm,
+            memory=self.chat_store.initialize_memory_bot(type, session_id),
+            system_prompt=system_prompt,
+        )
+        return chat_engine

core/chat/messaging.py ADDED Viewed

	@@ -0,0 +1,63 @@

+# Experimental
+from typing import Dict, Any, Optional, List
+import asyncio
+import logging
+from uuid import uuid4
+from anyio import ClosedResourceError
+from anyio.streams.memory import MemoryObjectSendStream
+from llama_index.core.callbacks.base import BaseCallbackHandler, CallbackManager
+from llama_index.core.callbacks import CBEventType, EventPayload
+from llama_index.core.query_engine.sub_question_query_engine import (
+    SubQuestionAnswerPair,
+)
+from llama_index.core.chat_engine.types import StreamingAgentChatResponse
+from pydantic import BaseModel
+from core.chat import schema
+from db.db import MessageSubProcessSourceEnum
+from core.chat.schema import SubProcessMetadataKeysEnum, SubProcessMetadataMap
+from core.chat.engine import Engine
+from script.vector_db import IndexManager
+from service.dto import UserPromptRequest
+logger = logging.getLogger(__name__)
+class StreamedMessage(BaseModel):
+    content: str
+async def handle_chat_message(
+    user_message: str,
+    send_chan: MemoryObjectSendStream,
+) -> None:
+    async with send_chan:
+        engine = Engine()
+        index_manager = IndexManager()
+        index = index_manager.load_existing_indexes()
+        # Retrieve the chat engine with the loaded index
+        chat_engine = await engine.get_chat_engine(index)
+        logger.debug("Engine received")
+        streaming_chat_response: StreamingAgentChatResponse = (
+            await chat_engine.astream_chat(user_message)
+        )
+        response_str = ""
+        async for text in streaming_chat_response.async_response_gen():
+            response_str += text
+            if send_chan._closed:
+                logger.debug(
+                    "Received streamed token after send channel closed. Ignoring."
+                )
+                return
+            await send_chan.send(StreamedMessage(content=response_str))
+        if response_str.strip() == "":
+            await send_chan.send(
+                StreamedMessage(
+                    content="Sorry, I either wasn't able to understand your question or I don't have an answer for it."
+                )
+            )

core/chat/schema.py ADDED Viewed

	@@ -0,0 +1,162 @@

+# Experimental
+from pydantic import BaseModel, Field, field_validator
+from typing import List, Optional, Dict, Union, Any
+from enum import Enum
+from uuid import UUID
+from datetime import datetime
+from llama_index.core.schema import BaseNode, NodeWithScore
+from llama_index.core.callbacks.schema import EventPayload
+from llama_index.core.query_engine.sub_question_query_engine import SubQuestionAnswerPair
+from db.db import (
+    MessageRoleEnum,
+    MessageStatusEnum,
+    MessageSubProcessSourceEnum,
+    MessageSubProcessStatusEnum,
+)
+DB_DOC_ID_KEY = "db_document_id"
+class Base(BaseModel):
+    id: Optional[UUID] = Field(None, description="Unique identifier")
+    created_at: Optional[datetime] = Field(None, description="Creation datetime")
+    updated_at: Optional[datetime] = Field(None, description="Update datetime")
+    class Config:
+        orm_mode = True
+class BaseMetadataObject(BaseModel):
+    class Config:
+        orm_mode = True
+class Citation(BaseMetadataObject):
+    document_id: UUID
+    text: str
+    page_number: int
+    score: Optional[float]
+    @field_validator("document_id")
+    def validate_document_id(cls, value):
+        if value:
+            return str(value)
+        return value
+    @classmethod
+    def from_node(cls, node_w_score: NodeWithScore) -> "Citation":
+        node: BaseNode = node_w_score.node
+        page_number = int(node.source_node.metadata["page_label"])
+        document_id = node.source_node.metadata[""]
+        return cls(
+            document_id=document_id,
+            text=node.get_content(),
+            page_number=page_number,
+            score=node_w_score.score,
+        )
+class QuestionAnswerPair(BaseMetadataObject):
+    """
+    A question-answer pair that is used to store the sub-questions and answers
+    """
+    question: str
+    answer: Optional[str]
+    citations: Optional[List[Citation]] = None
+    @classmethod
+    def from_sub_question_answer_pair(
+        cls, sub_question_answer_pair: SubQuestionAnswerPair
+    ):
+        if sub_question_answer_pair.sources is None:
+            citations = None
+        else:
+            citations = [
+                Citation.from_node(node_w_score)
+                for node_w_score in sub_question_answer_pair.sources
+                if node_w_score.node.source_node is not None
+                and DB_DOC_ID_KEY in node_w_score.node.source_node.metadata
+            ]
+        citations = citations or None
+        return cls(
+            question=sub_question_answer_pair.sub_q.sub_question,
+            answer=sub_question_answer_pair.answer,
+            citations=citations,
+        )
+# later will be Union[QuestionAnswerPair, more to add later... ]
+class SubProcessMetadataKeysEnum(str, Enum):
+    SUB_QUESTION = EventPayload.SUB_QUESTION.value
+# keeping the typing pretty loose here, in case there are changes to the metadata data formats.
+SubProcessMetadataMap = Dict[Union[SubProcessMetadataKeysEnum, str], Any]
+class MessageSubProcess(Base):
+    message_id: UUID
+    source: MessageSubProcessSourceEnum
+    status: MessageSubProcessStatusEnum
+    metadata_map: Optional[SubProcessMetadataMap]
+class Message(Base):
+    conversation_id: UUID
+    content: str
+    role: MessageRoleEnum
+    status: MessageStatusEnum
+    sub_processes: List[MessageSubProcess]
+class UserMessageCreate(BaseModel):
+    content: str
+class DocumentMetadataKeysEnum(str, Enum):
+    """
+    Enum for the keys of the metadata map for a document
+    """
+    SEC_DOCUMENT = "sec_document"
+class SecDocumentTypeEnum(str, Enum):
+    """
+    Enum for the type of sec document
+    """
+    TEN_K = "10-K"
+    TEN_Q = "10-Q"
+class SecDocumentMetadata(BaseModel):
+    """
+    Metadata for a document that is a sec document
+    """
+    company_name: str
+    company_ticker: str
+    doc_type: SecDocumentTypeEnum
+    year: int
+    quarter: Optional[int]
+    accession_number: Optional[str]
+    cik: Optional[str]
+    period_of_report_date: Optional[datetime]
+    filed_as_of_date: Optional[datetime]
+    date_as_of_change: Optional[datetime]
+DocumentMetadataMap = Dict[Union[DocumentMetadataKeysEnum, str], Any]
+class Document(Base):
+    url: str
+    metadata_map: Optional[DocumentMetadataMap] = None
+class Conversation(Base):
+    messages: List[Message]
+    documents: List[Document]
+class ConversationCreate(BaseModel):
+    document_ids: List[UUID]

core/parser.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import re
+def parse_topics_to_dict(text):
+    topics = {}
+    lines = text.strip().split("\n")
+    current_topic = None
+    topic_pattern = re.compile(r"^\d+\.\s+(.*)$")
+    sub_topic_pattern = re.compile(r"^\*\s+(.*)$")
+    for line in lines:
+        line = line.strip()
+        if topic_pattern.match(line):
+            current_topic = topic_pattern.match(line).group(1)
+            topics[current_topic] = []
+        elif sub_topic_pattern.match(line):
+            sub_topic = sub_topic_pattern.match(line).group(1)
+            if current_topic:
+                topics[current_topic].append(sub_topic)
+    print(topics)
+    return topics
+def remove_all_sources(text):
+    # Construct a regular expression pattern to match all sources
+    pattern = r"Source \d+:(.*?)(?=Source \d+:|$)"
+    # Use re.DOTALL to make '.' match newlines and re.IGNORECASE for case-insensitive matching
+    updated_text = re.sub(pattern, "", text, flags=re.DOTALL)
+    return updated_text.strip()
+def clean_text(text):
+    # Replace multiple spaces with a single space
+    text = re.sub(r"\s{2,}", " ", text)
+    # Remove newline characters that are not followed by a number (to keep lists or numbered points)
+    text = re.sub(r"\n(?!\s*\d)", " ", text)
+    # Remove unnecessary punctuation (optional, adjust as needed)
+    text = re.sub(r";(?=\S)", "", text)
+    # Optional: Remove extra spaces around certain characters
+    text = re.sub(r"\s*([,;])\s*", r"\1 ", text)
+    # Normalize whitespace to a single space
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+def update_response(text):
+    # Find all the references in the text, e.g., [1], [3], [5]
+    responses = re.findall(r"\[\d+\]", text)
+    # Extract the numbers from the responses, and remove duplicates
+    ref_numbers = sorted(set(int(respon.strip("[]")) for respon in responses))
+    # Create a mapping from old reference numbers to new ones
+    ref_mapping = {old: new for new, old in enumerate(ref_numbers, start=1)}
+    # Replace old responses with the updated responses in the text
+    for old, new in ref_mapping.items():
+        text = re.sub(rf"\[{old}\]", f"[{new}]", text)
+    return text
+def renumber_sources(source_list):
+    new_sources = []
+    for i, source in enumerate(source_list):
+        # Extract the content after the colon
+        content = source.split(": ", 1)[1]
+        # Add the new source number and content
+        new_sources.append(f"source {i+1}: {content}")
+    return new_sources
+def seperate_to_list(text):
+    # Step 1: Split the text by line breaks (\n)
+    lines = text.split("\n")
+    # Step 2: Remove occurrences of "source (number):"
+    cleaned_lines = [re.sub(r"Source \d+\:", "", line) for line in lines]
+    # Step 3: Split all capital sentences
+    final_output = []
+    for line in cleaned_lines:
+        # Split any fully capitalized sentence (surrounding non-uppercase text remains intact)
+        split_line = re.split(r"([A-Z\s]+[.!?])", line)
+        final_output.extend([part.strip() for part in split_line if part.strip()])
+    return final_output
+def join_list(items):
+    if not items:
+        return ""
+    elif len(items) == 1:
+        return items[0]
+    elif len(items) == 2:
+        return f"{items[0]} and {items[1]}"
+    else:
+        return ", ".join(items[:-1]) + " and " + items[-1]

core/prompt.py ADDED Viewed

	@@ -0,0 +1,126 @@

+SYSTEM_BOT_TEMPLATE = """
+Kamu adalah Medbot yang selalu menggunakan tools kamu untuk menjawab pertanyaan tentang kedokteran. Tugasmu adalah memberikan jawaban yang informatif dan akurat berdasarkan tools yang tersedia. {additional_information} Jika setelah itu tidak ada informasi yang ditemukan, katakan bahwa kamu tidak mengetahuinya dan berikan informasi dari apa yang kamu ketahui kemudian arahkan pengguna untuk bertanya ke dokter yang lebih ahli.
+**Instruksi**:
+ 1. **Jawaban Berdasarkan Tools**: Jika pengguna bertanya tentang topik kedokteran, gunakanlah tools yang tersedia untuk memberikan jawaban. Pastikan jawabanmu relevan dan sesuai dengan informasi dari tools tersebut.
+ 2. **Referensi dan Kutipan**: Jangan menghapus sumber kutipan dari teks yang diberikan. Contohnya, jika teksnya adalah "Ilmu kedokteran sangat dibutuhkan [2]", pastikan untuk menyertakan kutipan sumbernya yaitu [2] dalam jawabanmu.
+ 3. **Ketika Tidak Tahu Jawaban**: Jika pertanyaan pengguna tidak dapat dijawab dengan menggunakan tools ini, sampaikan dengan sopan bahwa kamu tidak memiliki jawaban untuk pertanyaan tersebut. Arahkan pengguna untuk mencari informasi lebih lanjut atau bertanya pada ahli di bidang kedokteran.
+ 4. **Gaya Jawaban**: Berikan jawaban dengan gaya yang ramah dan profesional. Hindari penggunaan poin-poin, dan sampaikan informasi secara naratif agar lebih mudah dipahami. Gunakan kata 'dok' atau 'dokter' untuk merujuk pada dokter, dan hindari kesan monoton dengan menambahkan emotikon jika sesuai.
+ 5. **Penutup**: Akhiri komunikasi dengan kalimat yang friendly, seperti "Semoga informasi ini bermanfaat, dok ✨" atau "Jika ada pertanyaan lain, jangan ragu untuk bertanya ya dok 😊"
+"""
+ADDITIONAL_INFORMATIONS = """
+Kemudian, kamu menjawab pertanyan user dari buku {titles}, jadi jika user bertaya kamu pastikan akan mengacu buku tersebut yang didapatkan dari tools dari yang kamu punya.
+"""
+SYSTEM_TOPIC_TEMPLATE = """
+You are tasked with analyzing a table of contents from a book. Your goal is to identify and extract the main topics and subtopics. Please provide a clear and organized list of these topics and subtopics. The list should reflect the structure and hierarchy presented in the table of contents.
+"""
+USER_TOPIC_TEMPLATE = """
+**Task:** Analyze the table of contents of a book to identify the main topics and relevant subtopics.
+**Instructions:**
+1. **Main Topics:** Identify the main topics from the table of contents, excluding sections like background, preface, introduction, and references.
+2. **Subtopics:** For each main topic, list the related subtopics
+**Output Format:**
+1. **Main Topic 1**
+   * Subtopic 1
+   * Subtopic 2
+   * etc.
+2. **Main Topic 2**
+   * Subtopic 1
+   * Subtopic 2
+   * etc.
+**Important Guidelines:**
+- Include only relevant main topics and subtopics.
+- Ensure the order of topics and subtopics matches the order displayed in the table of contents.
+- Use the correct format and do not include additional information beyond the main topics and subtopics.
+"""
+REFINED_GET_TOPIC_TEMPLATE = """
+Ensure the following topic and subtopic are provided:
+{topics}
+Follow this format :
+1. **Main topic 1**
+   * Subtopic 1
+   * Subtopic 2
+   * etc
+2. **Main topic 2**
+   * Subtopic 1
+   * Subtopic 2
+   * etc
+etc
+Do not add any additional text; only use the specified format.
+"""
+ADD_METADATA_TEMPLATE = """
+**Context for Adding Metadata**
+{context_str}
+**Context Structure:**
+1. **Main Topic 1**
+  * Subtopic 1
+  * Subtopic 2
+  * etc
+2. **Main Topic 2**
+  * Subtopic 1
+  * Subtopic 2
+  * etc
+**Given:**
+- **Topic and Subtopic:** {{extractor_output}}
+**Role:**
+Your task is to extract and organize metadata for the {class_name}. Follow the instructions below:
+**Instructions:**
+1. **Extract the Main Topic:**
+   - **Goal:** Identify the overarching theme or subject from the provided topic and subtopic.
+   - **How:** Look for a theme broad enough to encompass the document's primary focus while remaining specific enough to reflect its core purpose.
+   - **Tip:** Ensure the main topic is concise yet descriptive, providing a clear understanding of the document’s primary theme. If the content is general or introductory (e.g., background, preface, introduction, references), categorize it accordingly.
+2. **Extract the Key Subtopic (if applicable):**
+   - **Goal:** Determine the most relevant supporting element related to the main topic.
+   - **How:** Identify a sub-element or detail that provides additional depth or clarification to the main topic.
+   - **Tip:** Ensure the subtopic directly supports or elaborates on the main topic.
+3. **Handle Cases Without a Clear Subtopic:**
+   - **Goal:** If no distinct subtopic is present, set the subtopic to mirror the main topic.
+   - **How:** In such cases, consider the main topic comprehensive enough to stand alone without additional subtopics.
+4. **Record the Extracted Data:**
+   - **Goal:** Organize and record the extracted topic and subtopic within the {class_name} class.
+   - **How:** Structure the entries clearly and precisely as attributes of the class.
+   - **Tip:** Use precise language to capture the relationship between the main topic and subtopic, ensuring clarity and ease of reference for future use.
+"""
+SUMMARIZER_SYSTEM_TEMPLATE = """
+"""
+SUMMARIER_HUMAN_TEMPLATE = """
+"""

core/tools.py ADDED Viewed

File without changes

db/__init__.py ADDED Viewed

File without changes

db/database.py ADDED Viewed

	@@ -0,0 +1,76 @@

+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+from sqlalchemy.exc import OperationalError
+from config import MYSQL_CONFIG
+from fastapi import HTTPException
+import base64
+import os
+SQLALCHEMY_DATABASE_URL = MYSQL_CONFIG.DB_URI_SQL_ALCHEMY
+# Get the base64 encoded certificate from the environment variable
+ca_cert_base64 = os.getenv('CA_CERT_BASE64')
+# Decode the base64 content
+if ca_cert_base64:
+    ca_cert_content = base64.b64decode(ca_cert_base64).decode('utf-8')
+    # Write the decoded content to a temporary .pem file
+    with open('/tmp/ca.pem', 'w') as f:
+        f.write(ca_cert_content)
+    ca_cert_path = '/tmp/ca.pem'
+else:
+    raise ValueError("CA_CERT_BASE64 environment variable is not set")
+# Use the decoded CA certificate in the SQLAlchemy engine
+from sqlalchemy import create_engine
+engine = create_engine(
+    SQLALCHEMY_DATABASE_URL,
+    connect_args={
+        "ssl": {
+            "sslmode": "REQUIRED",
+            "ca": ca_cert_path,  # Path to the temporary CA certificate
+            # Add other SSL options as needed
+        }
+    },
+)
+# engine = create_engine(
+#     SQLALCHEMY_DATABASE_URL,
+#     connect_args={
+#         "ssl": {
+#             "sslmode": "REQUIRED",
+#             "ca": "ca.pem",  # Update this path to your CA certificate
+#             # Other SSL options can be added here
+#         }
+#     },
+# )
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    except OperationalError as e:
+        # Log the error and raise HTTPException for FastAPI
+        print(f"An error occurred in get database sql alchemy.: {e}")
+        raise HTTPException(
+            status_code=400, detail="Database connection error"
+        )
+    # Check if it's an authentication-related error
+    except Exception as e:
+        # Check if it's an authentication-related error
+        if "401" in str(e):
+            raise HTTPException(
+                status_code=401, detail="Authentication failed"
+            )
+        else:
+            # For any other type of exception, raise a generic 400 error
+            print(f"An error occurred: {e}")
+            raise HTTPException(
+                status_code=400, detail="An unexpected error occurred"
+            )

db/db.py ADDED Viewed

	@@ -0,0 +1,124 @@

+# Experimental
+from sqlalchemy import Column, String, Enum, ForeignKey, DateTime
+from sqlalchemy.dialects.postgresql import UUID, ENUM, JSONB
+from sqlalchemy.orm import relationship
+from sqlalchemy.sql import func
+from enum import Enum
+from sqlalchemy.ext.declarative import as_declarative, declared_attr
+from llama_index.core.callbacks.schema import CBEventType
+# Model
+@as_declarative()
+class Base:
+    id = Column(UUID, primary_key=True, index=True, default=func.uuid_generate_v4())
+    created_at = Column(DateTime, server_default=func.now(), nullable=False)
+    updated_at = Column(
+        DateTime, server_default=func.now(), onupdate=func.now(), nullable=False
+    )
+    __name__: str
+    # Generate __tablename__ automatically
+    @declared_attr
+    def __tablename__(cls) -> str:
+        return cls.__name__.lower()
+# DB
+class MessageRoleEnum(str, Enum):
+    user = "user"
+    assistant = "assistant"
+class MessageStatusEnum(str, Enum):
+    PENDING = "PENDING"
+    SUCCESS = "SUCCESS"
+    ERROR = "ERROR"
+class MessageSubProcessStatusEnum(str, Enum):
+    PENDING = "PENDING"
+    FINISHED = "FINISHED"
+# python doesn't allow enums to be extended, so we have to do this
+additional_message_subprocess_fields = {
+    "CONSTRUCTED_QUERY_ENGINE": "constructed_query_engine",
+    "SUB_QUESTIONS": "sub_questions",
+}
+MessageSubProcessSourceEnum = Enum(
+    "MessageSubProcessSourceEnum",
+    [(event_type.name, event_type.value) for event_type in CBEventType]
+    + list(additional_message_subprocess_fields.items()),
+)
+def to_pg_enum(enum_class) -> ENUM:
+    return ENUM(enum_class, name=enum_class.__name__)
+class Document(Base):
+    """
+    A document along with its metadata
+    """
+    # URL to the actual document (e.g. a PDF)
+    url = Column(String, nullable=False, unique=True)
+    metadata_map = Column(JSONB, nullable=True)
+    conversations = relationship("ConversationDocument", back_populates="document")
+class Conversation(Base):
+    """
+    A conversation with messages and linked documents
+    """
+    messages = relationship("Message", back_populates="conversation")
+    conversation_documents = relationship(
+        "ConversationDocument", back_populates="conversation"
+    )
+class ConversationDocument(Base):
+    """
+    A many-to-many relationship between a conversation and a document
+    """
+    conversation_id = Column(
+        UUID(as_uuid=True), ForeignKey("conversation.id"), index=True
+    )
+    document_id = Column(UUID(as_uuid=True), ForeignKey("document.id"), index=True)
+    conversation = relationship("Conversation", back_populates="conversation_documents")
+    document = relationship("Document", back_populates="conversations")
+class Message(Base):
+    """
+    A message in a conversation
+    """
+    conversation_id = Column(
+        UUID(as_uuid=True), ForeignKey("conversation.id"), index=True
+    )
+    content = Column(String)
+    role = Column(to_pg_enum(MessageRoleEnum))
+    status = Column(to_pg_enum(MessageStatusEnum), default=MessageStatusEnum.PENDING)
+    conversation = relationship("Conversation", back_populates="messages")
+    sub_processes = relationship("MessageSubProcess", back_populates="message")
+class MessageSubProcess(Base):
+    """
+    A record of a sub-process that occurred as part of the generation of a message from an AI assistant
+    """
+    message_id = Column(UUID(as_uuid=True), ForeignKey("message.id"), index=True)
+    source = Column(to_pg_enum(MessageSubProcessSourceEnum))
+    message = relationship("Message", back_populates="sub_processes")
+    status = Column(
+        to_pg_enum(MessageSubProcessStatusEnum),
+        default=MessageSubProcessStatusEnum.FINISHED,
+        nullable=False,
+    )
+    metadata_map = Column(JSONB, nullable=True)

db/delete_data.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import logging
+from db.repository import Repository, get_db_conn
+# Setup logging (configure as needed)
+logging.basicConfig(level=logging.INFO)
+class DeleteDatabase(Repository):
+    async def delete_record(self, params):
+        if "id" not in params:
+            raise ValueError("The 'id' parameter is required.")
+        query = """
+            DELETE FROM metadata
+            WHERE id = :id
+        """
+        try:
+            await self._exec(query, params)
+            logging.info(f"Record with id {params['id']} deleted successfully.")
+        except Exception as e:
+            logging.error(f"Error deleting record with id {params['id']}: {e}")
+            raise

db/get_data.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import logging
+from db.repository import Repository, get_db_conn
+# Setup logging (configure as needed)
+logging.basicConfig(level=logging.INFO)
+class GetDatabase(Repository):
+    async def execute_query(self, query, params=None, fetch_one=False):
+        """
+        Helper function to execute SQL queries and handle exceptions.
+        """
+        try:
+            print(fetch_one)
+            if fetch_one:
+                results = await self._fetch_one(query, params)
+                print(results)
+            else:
+                results = await self.get_by_query(query, params)
+                print("result execute query : ", results)
+            return results if results else None
+        except Exception as e:
+            logging.error(f"An error occurred while executing query: {e}")
+            return None
+    async def get_data(self, title):
+        """
+        Fetch the first result matching the given title from the metadata table.
+        """
+        query = """
+        SELECT * FROM metadata
+        WHERE title = %s
+        limit 5;
+        """
+        try:
+            results = await self.execute_query(query, (title,), fetch_one=True)
+            return results
+        except Exception as e:
+            logging.error(f"An error occurred while get data: {e}")
+            return None
+    async def get_all_data(self):
+        """
+        Fetch all data from the metadata table.
+        """
+        query = """
+        SELECT * FROM metadata
+        """
+        results = await self.execute_query(query)
+        return results
+    async def get_data_by_id(self, id):
+        query = f"""
+        SELECT * FROM metadata WHERE id = :id
+        """
+        param = {"id" : id}
+        try:
+            results = await self.execute_query(query, param)
+            print('Query successful, results: %s', results)
+            return results[0] if results else None
+        except Exception as e:
+            print('Error fetching data by ID %s: %s', id, e)
+            return None

db/models.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from typing import List, Literal
+from typing_extensions import Annotated
+from typing import Optional
+import uuid
+import datetime
+import sqlalchemy
+from sqlalchemy.orm import mapped_column, relationship
+from sqlalchemy import Integer, String, ForeignKey, UUID,func, Column, CHAR
+from sqlalchemy.orm import DeclarativeBase, Mapped
+timestamp_current = Annotated[
+    datetime.datetime,
+    mapped_column(nullable=False, server_default=func.CURRENT_TIMESTAMP()),
+]
+timestamp_update = Annotated[
+    datetime.datetime,
+    mapped_column(nullable=False, server_default=func.CURRENT_TIMESTAMP(), onupdate=func.CURRENT_TIMESTAMP()),
+]
+message_role = Literal["user", "assistant"]
+class Base(DeclarativeBase):
+    type_annotation_map = {
+        message_role: sqlalchemy.Enum("user", "assistant", name="message_role"),
+    }
+class User(Base):
+    __tablename__ = "user"
+    id = mapped_column(Integer, primary_key=True)
+    name = mapped_column(String(100), nullable=False)
+    username = mapped_column(String(100), unique=True, nullable=False)
+    role_id = mapped_column(Integer, ForeignKey("role.id"))
+    email = mapped_column(String(100), unique=True, nullable=False)
+    password_hash = mapped_column(String(100), nullable=False)
+    created_at: Mapped[timestamp_current]
+    updated_at : Mapped[timestamp_update]
+class Feedback(Base):
+    __tablename__ = "feedback"
+    id = mapped_column(Integer, primary_key=True)
+    user_id = mapped_column(Integer, ForeignKey("user.id"))
+    rating = mapped_column(Integer)
+    comment = mapped_column(String(1000))
+    created_at : Mapped[timestamp_current]
+class Role(Base):
+    __tablename__ = "role"
+    id = mapped_column(Integer, primary_key=True)
+    role_name = mapped_column(String(200), nullable=False)
+    description = mapped_column(String(200))
+class User_Role(Base):
+    __tablename__ = "user_role"
+    id = mapped_column(Integer, primary_key=True)
+    user_id = mapped_column(Integer, ForeignKey("user.id"))
+    role_id = mapped_column(Integer, ForeignKey("role.id"))
+class Bot(Base):
+    __tablename__ = "bot"
+    id = mapped_column(Integer, primary_key=True)
+    user_id = mapped_column(Integer, ForeignKey("user.id"))
+    bot_name = mapped_column(String(200), nullable=False)
+    created_at : Mapped[timestamp_current]
+class Session(Base):
+    __tablename__ = "session"
+    id = mapped_column(String(36), primary_key=True, index=True, default=lambda: str(uuid.uuid4()))  # Store as string
+    user_id = mapped_column(Integer, ForeignKey("user.id"))
+    bot_id = mapped_column(Integer, ForeignKey("bot.id"))
+    created_at : Mapped[timestamp_current]
+class Message(Base):
+     __tablename__ = "message"
+     id = mapped_column(Integer, primary_key=True)
+     session_id = mapped_column(String(36), ForeignKey("session.id"), nullable=False)  # Store as string
+     role : Mapped[message_role]
+     goal = mapped_column(String(200))
+     created_at : Mapped[timestamp_current]
+class Category(Base):
+    __tablename__ = "category"
+    id = mapped_column(Integer, primary_key=True)
+    category = mapped_column(String(200))
+    created_at : Mapped[timestamp_current]
+class Metadata(Base):
+    __tablename__ = "metadata"
+    id = mapped_column(Integer, primary_key=True)
+    title = mapped_column(String(200))
+    category_id = mapped_column(Integer, ForeignKey("category.id"))
+    author = mapped_column(String(200))
+    year = mapped_column(Integer)
+    publisher = mapped_column(String(100))
+    created_at : Mapped[timestamp_current]
+    updated_at : Mapped[timestamp_update]
+class Bot_Meta(Base):
+    __tablename__ = "bot_meta"
+    id = mapped_column(Integer, primary_key=True)
+    bot_id = mapped_column(Integer, ForeignKey("bot.id"))
+    metadata_id = mapped_column(Integer, ForeignKey("metadata.id"))

db/repository.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from databases import Database
+import datetime
+def get_db_conn(config):
+    db_url = f"{config.DB_URI}"
+    return Database(db_url)
+class Repository:
+    def __init__(self, db_conn):
+        self.db_conn = db_conn
+    async def get_by_query(self, query, param):
+        results = await self.db_conn.fetch_all(query, param)
+        print("result get _by query", results)
+        return [dict(result) for result in results]
+    async def _fetch_one(self, query, param):
+        result = await self.db_conn.fetch_one(query, param)
+        return dict(result) if result is not None else result
+    async def _exec(self, query, param):
+        return await self.db_conn.execute(query, param)
+    async def _exec_many(self, query, params):
+        return await self.db_conn.execute_many(query, params)
+    def update_params(self, params, update=False):
+        current_time = datetime.datetime.now()
+        if update == False:
+            params.update({"createdAt": current_time, "updatedAt": current_time})
+        else:
+            params.update({"updatedAt": current_time})
+        return params

db/save_data.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from databases import Database
+import logging
+from dotenv import load_dotenv
+from db.repository import Repository
+load_dotenv()
+class InsertDatabase(Repository):
+    # Example function to insert data asynchronously
+    async def insert_data(self, params, category_id):
+        # SQL insert query with named placeholders
+        query = """
+        INSERT INTO metadata (title, category_id, author, year, publisher)
+        VALUES (:title, :category_id, :author, :year, :publisher)
+        """
+        reference = {
+            "title": params["title"],
+            "category_id": category_id,  # directly assign category_id
+            "author": params["author"],
+            "year": params["year"],
+            "publisher": params["publisher"]
+        }
+        print(reference)
+        try:
+            # Execute the query with the provided values
+            await self._exec(query, reference)
+            logging.info(
+                f"Data inserted successfully: {reference['title']}, {reference['author']}"
+            )
+        except Exception as e:
+            # Log any errors that occur during the database insert operation
+            logging.error(f"Failed to insert data: {e}")
+            raise  # Re-raise the exception to allow further handling if needed

db/update_data.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import logging
+from db.repository import Repository, get_db_conn
+# Setup logging (configure as needed)
+logging.basicConfig(level=logging.INFO)
+class UpdateDatabase(Repository):
+    async def update_record(self, reference):
+        print("update record", reference)
+        if "id" not in reference:
+            raise ValueError("The 'id' parameter is required.")
+        query = """
+            UPDATE metadata
+            SET title = :title,
+                category_id = :category_id,
+                author = :author,
+                year = :year,
+                publisher = :publisher
+            WHERE id = :id
+        """
+        print(query)
+        print(reference)
+        try:
+            await self._exec(query, reference)
+            logging.info(
+                f"Record with id {reference['id']} updated successfully."
+            )
+        except Exception as e:
+            logging.error(
+                f"Error updating  record with id {reference['id']}: {e}"
+            )
+            raise

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+version: "3.10"
+services:
+  fastapi_app:
+    build: .
+    ports:
+      - "8000:8000"
+    env_file:
+      - .env

requirements.txt ADDED Viewed

	@@ -0,0 +1,160 @@

+aiohappyeyeballs==2.4.0
+aiohttp==3.10.5
+aiomysql==0.2.0
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==4.4.0
+asgiref==3.8.1
+attrs==24.2.0
+backoff==2.2.1
+bcrypt==4.2.0
+beautifulsoup4==4.12.3
+build==1.2.2
+cachetools==5.5.0
+certifi==2024.8.30
+chardet==5.2.0
+charset-normalizer==3.3.2
+chroma-hnswlib==0.7.6
+chromadb==0.5.7
+click==8.1.7
+coloredlogs==15.0.1
+databases==0.9.0
+dataclasses-json==0.6.7
+Deprecated==1.2.14
+dirtyjson==1.0.8
+distro==1.9.0
+dnspython==1.16.0
+fastapi==0.113.0
+filelock==3.16.1
+flatbuffers==24.3.25
+frozenlist==1.4.1
+fsspec==2024.9.0
+google-auth==2.34.0
+googleapis-common-protos==1.65.0
+greenlet==3.0.3
+grpcio==1.66.1
+h11==0.14.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.2
+huggingface-hub==0.25.0
+humanfriendly==10.0
+idna==3.8
+importlib_metadata==8.4.0
+importlib_resources==6.4.5
+Jinja2==3.1.4
+jiter==0.5.0
+joblib==1.4.2
+jsonpatch==1.33
+jsonpointer==3.0.0
+kubernetes==30.1.0
+langchain==0.3.0
+langchain-community==0.3.0
+langchain-core==0.3.1
+langchain-openai==0.2.0
+langchain-text-splitters==0.3.0
+langchainhub==0.1.21
+langfuse==2.48.1
+langsmith==0.1.123
+llama-cloud==0.0.17
+llama-index==0.11.10
+llama-index-agent-openai==0.3.1
+llama-index-callbacks-langfuse==0.2.0
+llama-index-cli==0.3.1
+llama-index-core==0.11.10
+llama-index-embeddings-openai==0.2.4
+llama-index-indices-managed-llama-cloud==0.3.0
+llama-index-legacy==0.9.48.post3
+llama-index-llms-openai==0.2.7
+llama-index-multi-modal-llms-openai==0.2.0
+llama-index-program-openai==0.2.0
+llama-index-question-gen-openai==0.2.0
+llama-index-readers-file==0.2.1
+llama-index-readers-llama-parse==0.3.0
+llama-index-storage-chat-store-redis==0.2.0
+llama-index-vector-stores-pinecone==0.2.1
+llama-parse==0.5.2
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.22.0
+mdurl==0.1.2
+mmh3==5.0.0
+monotonic==1.6
+mpmath==1.3.0
+multidict==6.0.5
+mypy-extensions==1.0.0
+mysqlclient==2.2.4
+nest_asyncio==1.6.0
+networkx==3.3
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.2.2
+onnxruntime==1.19.2
+openai==1.43.1
+opentelemetry-api==1.27.0
+opentelemetry-exporter-otlp-proto-common==1.27.0
+opentelemetry-exporter-otlp-proto-grpc==1.27.0
+opentelemetry-instrumentation==0.48b0
+opentelemetry-instrumentation-asgi==0.48b0
+opentelemetry-instrumentation-fastapi==0.48b0
+opentelemetry-proto==1.27.0
+opentelemetry-sdk==1.27.0
+opentelemetry-semantic-conventions==0.48b0
+opentelemetry-util-http==0.48b0
+orjson==3.10.7
+overrides==7.7.0
+pandas==2.2.2
+pillow==10.4.0
+pinecone-client==5.0.1
+pinecone-plugin-inference==1.0.3
+pinecone-plugin-interface==0.0.7
+posthog==3.6.6
+protobuf==4.25.5
+protoc-gen-openapiv2==0.0.1
+pyasn1==0.6.1
+pyasn1_modules==0.4.1
+pydantic==2.9.0
+pydantic-settings==2.4.0
+pydantic_core==2.23.2
+pymongo==3.11.0
+PyMuPDF==1.24.10
+PyMuPDFb==1.24.10
+PyMySQL==1.1.1
+pypdf==4.3.1
+PyPDF2==3.0.1
+PyPika==0.48.9
+pyproject_hooks==1.1.0
+pyreadline3==3.5.4
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.2
+redis==5.0.8
+regex==2024.7.24
+requests==2.32.3
+requests-oauthlib==2.0.0
+rich==13.8.1
+rsa==4.9
+shellingham==1.5.4
+sniffio==1.3.1
+soupsieve==2.6
+SQLAlchemy==2.0.34
+sse-starlette==2.1.3
+starlette==0.38.4
+striprtf==0.0.26
+sympy==1.13.3
+tenacity==8.5.0
+tiktoken==0.7.0
+tokenizers==0.20.0
+tqdm==4.66.5
+typer==0.12.5
+types-requests==2.32.0.20240914
+typing-inspect==0.9.0
+tzdata==2024.1
+urllib3==2.2.2
+uvicorn==0.30.6
+watchfiles==0.24.0
+websocket-client==1.8.0
+websockets==13.0.1
+wrapt==1.16.0
+yarl==1.9.11

script/__init__.py ADDED Viewed

File without changes

script/document_uploader.py ADDED Viewed

	@@ -0,0 +1,116 @@

+from llama_index.core.ingestion import IngestionPipeline
+from llama_index.core.extractors import PydanticProgramExtractor
+from llama_index.embeddings.openai import OpenAIEmbedding
+from config import PINECONE_CONFIG
+from pinecone.grpc import PineconeGRPC as Pinecone
+from service.reader import Reader
+from script.get_metadata import Metadata
+from fastapi import UploadFile, HTTPException,status
+from llama_index.core.node_parser import (
+    SentenceSplitter,
+    SemanticSplitterNodeParser,
+)
+# from script.get_topic import extract_topic
+import logging
+import random
+class Uploader:
+    # def __init__(self, reference, file: UploadFile, content_table: UploadFile):
+    def __init__(self, reference, file: UploadFile):
+        self.file = file
+        # self.content_table = content_table
+        self.reader = Reader()
+        self.reference = reference
+        self.metadata = Metadata(reference)
+    async def ingest_documents(self, file: UploadFile):
+        """Load documents from the storage path."""
+        documents = await self.reader.read_from_uploadfile(file)
+        print("Banyak document : ", len(documents))
+        print("document successfully ingested")
+        return documents
+    def check_existing_metadata(self, pinecone_index, title, random_vector):
+        try:
+            result = pinecone_index.query(
+                vector=random_vector,
+                top_k=1,
+                filter={
+                    "title": {"$eq": title},
+                },
+            )
+            return result["matches"]
+        except Exception as e:
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail=f"Error check existing metadata {str(e)}",
+            )
+    async def process_documents(self):
+        # Ingest documents
+        print("test")
+        documents = await self.ingest_documents(self.file)
+        print("documents")
+        # topic_extractor = extract_topic(self.reference, self.content_table)
+        embed_model = OpenAIEmbedding()
+        # Get metadata
+        documents_with_metadata = self.metadata.apply_metadata(documents)
+        print("documents_with_metadata")
+        # document_filtered = self.filter_document(documents_with_metadata)
+        # Set up the ingestion pipeline
+        pipeline = IngestionPipeline(
+            transformations=[
+                SemanticSplitterNodeParser(
+                    buffer_size=1,
+                    breakpoint_percentile_threshold=95,
+                    embed_model=embed_model,
+                ),
+                # topic_extractor,
+            ]
+        )
+        # splitter = SemanticSplitterNodeParser(
+        #     buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model
+        # )
+        #  Run the pipeline
+        try:
+            nodes_with_metadata = pipeline.run(documents=documents_with_metadata)
+            # nodes_with_metadata = splitter.get_nodes_from_documents(documents_with_metadata)
+            print("Pipeline processing completed updated.")
+            return nodes_with_metadata
+        except Exception as e:
+            # Log the error and raise HTTPException for FastAPI
+            logging.error(f"An error occurred in making pipeline: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="An internal server error occurred making pipeline.",
+            )
+    def filter_document(self, documents):
+        api_key = PINECONE_CONFIG.PINECONE_API_KEY
+        client = Pinecone(api_key=api_key)
+        pinecone_index = client.Index("test")
+        random_vector = [random.uniform(0, 1) for _ in range(1536)]
+        filtered_documents = []
+        for doc in documents:
+            result = self.check_existing_metadata(
+                pinecone_index, doc.metadata["title"], random_vector
+            )
+            if len(result) == 0:
+                filtered_documents.append(doc)
+        return filtered_documents

script/get_metadata.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# Get reference
+class Metadata:
+    def __init__(self, reference):
+        self.reference = reference
+    def add_metadata(self, documents, metadata):
+        """Add metadata to each document and include page number."""
+        print("banyak dokument : ",len(documents))
+        for page_number, document in enumerate(documents, start=1):
+            # Ensure the document has a metadata attribute
+            if not hasattr(document, "metadata") or document.metadata is None:
+                document.metadata = {}
+            # Update metadata with page number
+            document.metadata["page"] = page_number
+            document.metadata.update(metadata)
+            print(f"Metadata added to page {page_number}")
+            # self.logger.log_action(f"Metadata added to document {document.id_}", action_type="METADATA")
+        return documents
+    def _generate_metadata(self):
+        """Generate metadata and return it."""
+        metadata = {
+            "title": self.reference["title"],
+            "author": self.reference["author"],
+            "category": self.reference["category"],
+            "year": self.reference["year"],
+            "publisher": self.reference["publisher"],
+            "reference": f"{self.reference['author']}. ({self.reference['year']}). *{self.reference['title']}*. {self.reference['publisher']}."  # APA style reference
+        }
+        print("metadata is generated")
+        return metadata
+    def apply_metadata(self, documents):
+        """Apply generated metadata to documents."""
+        metadata = self._generate_metadata()
+        print("metadata is applied")
+        return self.add_metadata(documents, metadata)

script/get_topic.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import nest_asyncio
+import os
+from dotenv import load_dotenv
+from jinja2 import Template
+from pydantic import BaseModel, Field
+from pymongo.mongo_client import MongoClient
+from llama_index.program.openai import OpenAIPydanticProgram
+from llama_index.core.extractors import PydanticProgramExtractor
+from llama_index.llms.openai import OpenAI
+from core.prompt import ADD_METADATA_TEMPLATE
+from core.summarization.summarizer import SummarizeGenerator
+nest_asyncio.apply()
+load_dotenv()
+class NodeMetadata(BaseModel):
+    """Metadata for nodes, capturing topic and subtopic from the book."""
+    topic: str = Field(
+        ...,
+        description="The main subject or category that the node is associated with, representing a broad theme within the book.",
+    )
+    subtopic: str = Field(
+        ...,
+        description="A more specific aspect or section under the main topic, refining the context of the node within the book.",
+    )
+def extract_topic(references, content_table):
+    uri = os.getenv("MONGO_URI")
+    client = MongoClient(uri)
+    try:
+        client.admin.command('ping')
+        print("Pinged your deployment. You successfully connected to MongoDB!")
+    except Exception as e:
+        print(e)
+        # Access a specific database
+    db = client["summarizer"]
+    # Access a collection within the database
+    collection = db["topic_collection"]
+    generate_content_table = SummarizeGenerator(references)
+    extractor_output, extractor_dics  = generate_content_table.extract_content_table(content_table)
+    print(extractor_output)
+    data_to_insert = {
+    "title": references["title"],
+    **extractor_dics  # Unpack the extractor_output dictionary
+    }
+    collection.insert_one(data_to_insert)
+    add_metadata_template = str(
+        Template(ADD_METADATA_TEMPLATE).render(extractor_output=extractor_output)
+    )
+    print("add metadata template : ", add_metadata_template)
+    llm = OpenAI(temperature=0.1, model="gpt-4o-mini")
+    openai_program = OpenAIPydanticProgram.from_defaults(
+        output_cls=NodeMetadata,
+        prompt_template_str="{input}",
+        extract_template_str=add_metadata_template,
+        llm=llm,
+    )
+    topic_extractor = PydanticProgramExtractor(
+        program=openai_program,
+        input_key="input",
+        show_progress=True,
+        extract_template_str=add_metadata_template,
+        llm=llm,
+    )
+    return topic_extractor

script/vector_db.py ADDED Viewed

	@@ -0,0 +1,157 @@

+from llama_index.core import VectorStoreIndex
+from llama_index.core import StorageContext
+from pinecone import Pinecone, ServerlessSpec
+from llama_index.llms.openai import OpenAI
+from llama_index.vector_stores.pinecone import PineconeVectorStore
+from fastapi import HTTPException, status
+from config import PINECONE_CONFIG
+from math import ceil
+import numpy as np
+import os
+import json
+class IndexManager:
+    def __init__(self, index_name: str = "summarizer-semantic-index"):
+        self.vector_index = None
+        self.index_name = index_name
+        self.client = self._get_pinecone_client()
+        self.pinecone_index = self._create_pinecone_index()
+    def _get_pinecone_client(self):
+        """Initialize and return the Pinecone client."""
+        # api_key = os.getenv("PINECONE_API_KEY")
+        api_key = PINECONE_CONFIG.PINECONE_API_KEY
+        if not api_key:
+            raise ValueError(
+                "Pinecone API key is missing. Please set it in environment variables."
+            )
+        return Pinecone(api_key=api_key)
+    def _create_pinecone_index(self):
+        """Create Pinecone index if it doesn't already exist."""
+        if self.index_name not in self.client.list_indexes().names():
+            self.client.create_index(
+                name=self.index_name,
+                dimension=1536,
+                metric="cosine",
+                spec=ServerlessSpec(cloud="aws", region="us-east-1"),
+            )
+        return self.client.Index(self.index_name)
+    def _initialize_vector_store(self) -> StorageContext:
+        """Initialize and return the vector store with the Pinecone index."""
+        vector_store = PineconeVectorStore(pinecone_index=self.pinecone_index)
+        return StorageContext.from_defaults(vector_store=vector_store)
+    def build_indexes(self, nodes):
+        """Build vector and tree indexes from nodes."""
+        try:
+            storage_context = self._initialize_vector_store()
+            self.vector_index = VectorStoreIndex(nodes, storage_context=storage_context)
+            self.vector_index.set_index_id("vector")
+            print(f"Vector Index ID: {self.vector_index.index_id}")
+            print("Vector Index created successfully.")
+            return json.dumps({"status": "success", "message": "Vector Index loaded successfully."})
+        except HTTPException as http_exc:
+            raise http_exc  # Re-raise HTTPExceptions to ensure FastAPI handles them
+        except Exception as e:
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail=f"Error loading existing indexes: {str(e)}"
+            )
+    def get_ids_from_query(self, input_vector, title):
+        print("Searching Pinecone...")
+        print(title)
+        new_ids = set()  # Initialize new_ids outside the loop
+        while True:
+            results = self.pinecone_index.query(
+                vector=input_vector,
+                top_k=10000,
+                filter={
+                    "title": {"$eq": f"{title}"},
+                },
+            )
+            ids = set()
+            for result in results['matches']:
+                ids.add(result['id'])
+            # Check if there's any overlap between ids and new_ids
+            if ids.issubset(new_ids):
+                break
+            else:
+                new_ids.update(ids)  # Add all new ids to new_ids
+        return new_ids
+    def get_all_ids_from_index(self, title):
+        num_dimensions = 1536
+        num_vectors = self.pinecone_index.describe_index_stats(
+        )["total_vector_count"]
+        print("Length of ids list is shorter than the number of total vectors...")
+        input_vector = np.random.rand(num_dimensions).tolist()
+        print("creating random vector...")
+        ids = self.get_ids_from_query(input_vector, title)
+        print("getting ids from a vector query...")
+        print("updating ids set...")
+        print(f"Collected {len(ids)} ids out of {num_vectors}.")
+        return ids
+    def delete_vector_database(self, old_reference):
+        try :
+            batch_size = 1000
+            all_ids = self.get_all_ids_from_index(old_reference['title'])
+            all_ids = list(all_ids)
+            # Split ids into chunks of batch_size
+            num_batches = ceil(len(all_ids) / batch_size)
+            for i in range(num_batches):
+                # Fetch a batch of IDs
+                batch_ids = all_ids[i * batch_size: (i + 1) * batch_size]
+                self.pinecone_index.delete(ids=batch_ids)
+                print(f"delete from id {i * batch_size} to {(i + 1) * batch_size} successful")
+        except Exception as e:
+            print(e)
+            raise HTTPException(status_code=500, detail="An error occurred while delete metadata")
+    def update_vector_database(self, old_reference, new_reference):
+        reference = new_reference.model_dump()
+        all_ids = self.get_all_ids_from_index(old_reference['title'])
+        all_ids = list(all_ids)
+        for id in all_ids:
+            self.pinecone_index.update(
+                id=id,
+                set_metadata=reference
+            )
+    def load_existing_indexes(self):
+        """Load existing indexes from Pinecone."""
+        try:
+            client = self._get_pinecone_client()
+            pinecone_index = client.Index(self.index_name)
+            vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
+            retriever = VectorStoreIndex.from_vector_store(vector_store)
+            print("Existing Vector Index loaded successfully.")
+            return retriever
+        except Exception as e:
+            print(f"Error loading existing indexes: {e}")
+            raise

service/__init__.py ADDED Viewed

File without changes