Christopher Secccafico
commited on
Commit
•
9e80d00
1
Parent(s):
4a87a8c
Upload 9 files
Browse files- MVP.py +0 -0
- data.txt +7 -0
- data_loader.py +7 -0
- embed initializer.py +10 -0
- full run.py +57 -0
- initializer2.py +7 -0
- run_ollama.sh +9 -0
- setup.py +14 -0
- tester.py +6 -0
MVP.py
ADDED
File without changes
|
data.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# data.txt
|
2 |
+
|
3 |
+
19 Dec - Go to the movie
|
4 |
+
20 Dec - Have a dinner with family
|
5 |
+
21 Dec - Go to the birthday party
|
6 |
+
22 Dec - Go to the dentist
|
7 |
+
23 Dec - Finish writing a draft for blog post
|
data_loader.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
|
2 |
+
|
3 |
+
# Load data from a specified directory and file
|
4 |
+
document = SimpleDirectoryReader(input_files=['/path/to/data.txt']).load_data()
|
5 |
+
|
6 |
+
# Process data (chunking, embedding, indexing) and store them
|
7 |
+
index = VectorStoreIndex.from_documents(document)
|
embed initializer.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_index.core.indices.prompt_helper import PromptHelper
|
2 |
+
from llama_index.core.node_parser import SentenceSplitter
|
3 |
+
from llama_index.core import Settings
|
4 |
+
# Initialize PromptHelper and SentenceSplitter
|
5 |
+
prompt_helper = PromptHelper(context_window=2048)
|
6 |
+
node_parser = SentenceSplitter(chunk_size=300, chunk_overlap=20)
|
7 |
+
|
8 |
+
# Update Settings with helpers
|
9 |
+
Settings.prompt_helper = prompt_helper
|
10 |
+
Settings.node_parser = node_parser
|
full run.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
3 |
+
from langchain_community.llms import Ollama
|
4 |
+
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
|
5 |
+
from llama_index.core.indices.prompt_helper import PromptHelper
|
6 |
+
from llama_index.core.node_parser import SentenceSplitter
|
7 |
+
|
8 |
+
|
9 |
+
def setup_environment():
|
10 |
+
# Set environment variables
|
11 |
+
os.environ['llm'] = 'tinyllama'
|
12 |
+
|
13 |
+
|
14 |
+
def initialize_settings():
|
15 |
+
# Initialize and configure the LLM and embedding model
|
16 |
+
llm_model = os.getenv('llm')
|
17 |
+
llm = Ollama(model=llm_model)
|
18 |
+
embed_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
|
19 |
+
|
20 |
+
# Initialize prompt helper and sentence splitter
|
21 |
+
prompt_helper = PromptHelper(context_window=2048)
|
22 |
+
node_parser = SentenceSplitter(chunk_size=300, chunk_overlap=20)
|
23 |
+
|
24 |
+
# Configure global settings for application
|
25 |
+
Settings.llm = llm
|
26 |
+
Settings.embed_model = embed_model
|
27 |
+
Settings.prompt_helper = prompt_helper
|
28 |
+
Settings.node_parser = node_parser
|
29 |
+
|
30 |
+
|
31 |
+
def load_and_index_data(file_path):
|
32 |
+
# Load data from the specified file path
|
33 |
+
document = SimpleDirectoryReader(input_files=[file_path]).load_data()
|
34 |
+
|
35 |
+
# Process data (chunking, embedding, indexing) and store them in a vector store index
|
36 |
+
return VectorStoreIndex.from_documents(document)
|
37 |
+
|
38 |
+
|
39 |
+
def query_data(query_engine, query):
|
40 |
+
# Query the indexed data and return the response
|
41 |
+
return query_engine.query(query)
|
42 |
+
|
43 |
+
|
44 |
+
if __name__ == "__main__":
|
45 |
+
setup_environment()
|
46 |
+
initialize_settings()
|
47 |
+
|
48 |
+
# Assuming the file is stored at a specified path
|
49 |
+
file_path = 'data.txt'
|
50 |
+
index = load_and_index_data(file_path)
|
51 |
+
|
52 |
+
# Build a query engine from the index
|
53 |
+
query_engine = index.as_query_engine()
|
54 |
+
|
55 |
+
# Example query
|
56 |
+
response = query_data(query_engine, 'show me my calander dates.')
|
57 |
+
print(response)
|
initializer2.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_index.core import Settings
|
2 |
+
|
3 |
+
# Configure a global setting for our app so that the VectorStoreIndex and the QueryEngine can use these components by default.
|
4 |
+
Settings.llm = llm
|
5 |
+
Settings.embed_model = embed_model
|
6 |
+
Settings.prompt_helper = prompt_helper
|
7 |
+
Settings.node_parser = node_parser
|
run_ollama.sh
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# This script will first install ollama and then run it with the 'mistral' command.
|
4 |
+
|
5 |
+
# Download and execute the installation script
|
6 |
+
curl -fsSL https://ollama.com/install.sh | sh
|
7 |
+
|
8 |
+
# Run ollama with the 'mistral' command
|
9 |
+
ollama run mistral
|
setup.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.llms import Ollama
|
2 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
3 |
+
from llama_index.core import Settings
|
4 |
+
|
5 |
+
# Create an Ollama instance with the model configuration
|
6 |
+
llm = Ollama(model='tinyllama')
|
7 |
+
|
8 |
+
# Use the llm instance directly where needed
|
9 |
+
Settings.llm = llm
|
10 |
+
|
11 |
+
# Configure the embedding model for your settings
|
12 |
+
Settings.embed_model = HuggingFaceEmbeddings(
|
13 |
+
model_name="BAAI/bge-small-en-v1.5"
|
14 |
+
)
|
tester.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Build a query engine from the index
|
2 |
+
query_engine = index.as_query_engine()
|
3 |
+
|
4 |
+
# Execute a query and print the response
|
5 |
+
response = query_engine.query('Give me my calendar.')
|
6 |
+
print(response)
|