Christopher Secccafico commited on
Commit
9e80d00
1 Parent(s): 4a87a8c

Upload 9 files

Browse files
Files changed (9) hide show
  1. MVP.py +0 -0
  2. data.txt +7 -0
  3. data_loader.py +7 -0
  4. embed initializer.py +10 -0
  5. full run.py +57 -0
  6. initializer2.py +7 -0
  7. run_ollama.sh +9 -0
  8. setup.py +14 -0
  9. tester.py +6 -0
MVP.py ADDED
File without changes
data.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # data.txt
2
+
3
+ 19 Dec - Go to the movie
4
+ 20 Dec - Have a dinner with family
5
+ 21 Dec - Go to the birthday party
6
+ 22 Dec - Go to the dentist
7
+ 23 Dec - Finish writing a draft for blog post
data_loader.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
2
+
3
+ # Load data from a specified directory and file
4
+ document = SimpleDirectoryReader(input_files=['/path/to/data.txt']).load_data()
5
+
6
+ # Process data (chunking, embedding, indexing) and store them
7
+ index = VectorStoreIndex.from_documents(document)
embed initializer.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.core.indices.prompt_helper import PromptHelper
2
+ from llama_index.core.node_parser import SentenceSplitter
3
+ from llama_index.core import Settings
4
+ # Initialize PromptHelper and SentenceSplitter
5
+ prompt_helper = PromptHelper(context_window=2048)
6
+ node_parser = SentenceSplitter(chunk_size=300, chunk_overlap=20)
7
+
8
+ # Update Settings with helpers
9
+ Settings.prompt_helper = prompt_helper
10
+ Settings.node_parser = node_parser
full run.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_community.embeddings import HuggingFaceEmbeddings
3
+ from langchain_community.llms import Ollama
4
+ from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
5
+ from llama_index.core.indices.prompt_helper import PromptHelper
6
+ from llama_index.core.node_parser import SentenceSplitter
7
+
8
+
9
+ def setup_environment():
10
+ # Set environment variables
11
+ os.environ['llm'] = 'tinyllama'
12
+
13
+
14
+ def initialize_settings():
15
+ # Initialize and configure the LLM and embedding model
16
+ llm_model = os.getenv('llm')
17
+ llm = Ollama(model=llm_model)
18
+ embed_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
19
+
20
+ # Initialize prompt helper and sentence splitter
21
+ prompt_helper = PromptHelper(context_window=2048)
22
+ node_parser = SentenceSplitter(chunk_size=300, chunk_overlap=20)
23
+
24
+ # Configure global settings for application
25
+ Settings.llm = llm
26
+ Settings.embed_model = embed_model
27
+ Settings.prompt_helper = prompt_helper
28
+ Settings.node_parser = node_parser
29
+
30
+
31
+ def load_and_index_data(file_path):
32
+ # Load data from the specified file path
33
+ document = SimpleDirectoryReader(input_files=[file_path]).load_data()
34
+
35
+ # Process data (chunking, embedding, indexing) and store them in a vector store index
36
+ return VectorStoreIndex.from_documents(document)
37
+
38
+
39
+ def query_data(query_engine, query):
40
+ # Query the indexed data and return the response
41
+ return query_engine.query(query)
42
+
43
+
44
+ if __name__ == "__main__":
45
+ setup_environment()
46
+ initialize_settings()
47
+
48
+ # Assuming the file is stored at a specified path
49
+ file_path = 'data.txt'
50
+ index = load_and_index_data(file_path)
51
+
52
+ # Build a query engine from the index
53
+ query_engine = index.as_query_engine()
54
+
55
+ # Example query
56
+ response = query_data(query_engine, 'show me my calander dates.')
57
+ print(response)
initializer2.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from llama_index.core import Settings
2
+
3
+ # Configure a global setting for our app so that the VectorStoreIndex and the QueryEngine can use these components by default.
4
+ Settings.llm = llm
5
+ Settings.embed_model = embed_model
6
+ Settings.prompt_helper = prompt_helper
7
+ Settings.node_parser = node_parser
run_ollama.sh ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # This script will first install ollama and then run it with the 'mistral' command.
4
+
5
+ # Download and execute the installation script
6
+ curl -fsSL https://ollama.com/install.sh | sh
7
+
8
+ # Run ollama with the 'mistral' command
9
+ ollama run mistral
setup.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.llms import Ollama
2
+ from langchain_community.embeddings import HuggingFaceEmbeddings
3
+ from llama_index.core import Settings
4
+
5
+ # Create an Ollama instance with the model configuration
6
+ llm = Ollama(model='tinyllama')
7
+
8
+ # Use the llm instance directly where needed
9
+ Settings.llm = llm
10
+
11
+ # Configure the embedding model for your settings
12
+ Settings.embed_model = HuggingFaceEmbeddings(
13
+ model_name="BAAI/bge-small-en-v1.5"
14
+ )
tester.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Build a query engine from the index
2
+ query_engine = index.as_query_engine()
3
+
4
+ # Execute a query and print the response
5
+ response = query_engine.query('Give me my calendar.')
6
+ print(response)