alonsosilva commited on
Commit
72e2472
1 Parent(s): 77bafdf
Files changed (3) hide show
  1. Dockerfile +23 -0
  2. app.py +121 -0
  3. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ # Set up a new user named "user" with user ID 1000 for permission
4
+ RUN useradd -m -u 1000 user
5
+ # Switch to the "user" user
6
+ USER user
7
+ # Set home to the user's home directory
8
+ ENV HOME=/home/user \
9
+ PATH=/home/user/.local/bin:$PATH
10
+
11
+ # Upgreade pip
12
+ RUN pip install --no-cache-dir --upgrade pip
13
+
14
+ COPY --chown=user requirements.txt requirements.txt
15
+
16
+ # Install requirements
17
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
18
+
19
+ COPY --chown=user app.py app.py
20
+
21
+ COPY --chown=user data/ data/
22
+
23
+ ENTRYPOINT ["solara", "run", "app.py", "--host=0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #from dotenv import find_dotenv, load_dotenv
2
+ #_ = load_dotenv(find_dotenv())
3
+
4
+ import solara
5
+
6
+ # Clean up all the directories used in this notebook
7
+ import shutil
8
+
9
+ shutil.rmtree("./data", ignore_errors=True)
10
+
11
+ import polars as pl
12
+
13
+ df = pl.read_csv(
14
+ "https://drive.google.com/uc?export=download&id=1uD3h7xYxr9EoZ0Ggoh99JtQXa3AxtxyU"
15
+ )
16
+
17
+ import string
18
+
19
+ df = df.with_columns(
20
+ pl.Series("Album", [string.capwords(album) for album in df["Album"]])
21
+ )
22
+ df = df.with_columns(pl.Series("Song", [string.capwords(song) for song in df["Song"]]))
23
+ df = df.with_columns(pl.col("Lyrics").fill_null("None"))
24
+
25
+ df = df.with_columns(
26
+ text=pl.lit("# ")
27
+ + pl.col("Album")
28
+ + pl.lit(": ")
29
+ + pl.col("Song")
30
+ + pl.lit("\n\n")
31
+ + pl.col("Lyrics")
32
+ # text = pl.col("Lyrics")
33
+ )
34
+
35
+ import lancedb
36
+
37
+ db = lancedb.connect("data/")
38
+
39
+ from lancedb.embeddings import get_registry
40
+
41
+ embeddings = (
42
+ get_registry()
43
+ .get("sentence-transformers")
44
+ .create(name="TaylorAI/gte-tiny", device="cpu")
45
+ )
46
+
47
+ from lancedb.pydantic import LanceModel, Vector
48
+
49
+
50
+ class Songs(LanceModel):
51
+ Song: str
52
+ Lyrics: str
53
+ Album: str
54
+ Artist: str
55
+ text: str = embeddings.SourceField()
56
+ vector: Vector(embeddings.ndims()) = embeddings.VectorField()
57
+
58
+ table = db.create_table("Songs", schema=Songs)
59
+ table.add(data=df)
60
+
61
+ import os
62
+ from typing import Optional
63
+
64
+ from langchain_community.chat_models import ChatOpenAI
65
+
66
+ class ChatOpenRouter(ChatOpenAI):
67
+ openai_api_base: str
68
+ openai_api_key: str
69
+ model_name: str
70
+
71
+ def __init__(
72
+ self,
73
+ model_name: str,
74
+ openai_api_key: Optional[str] = None,
75
+ openai_api_base: str = "https://openrouter.ai/api/v1",
76
+ **kwargs,
77
+ ):
78
+ openai_api_key = os.getenv("OPENROUTER_API_KEY")
79
+ super().__init__(
80
+ openai_api_base=openai_api_base,
81
+ openai_api_key=openai_api_key,
82
+ model_name=model_name,
83
+ **kwargs,
84
+ )
85
+
86
+ llm_openrouter = ChatOpenRouter(model_name="meta-llama/llama-3.1-405b-instruct")
87
+
88
+ def get_relevant_texts(query, table):
89
+ results = (
90
+ table.search(query)
91
+ .limit(5)
92
+ .to_polars()
93
+ )
94
+ return " ".join([results["text"][i] + "\n\n---\n\n" for i in range(5)])
95
+
96
+ def generate_prompt(query, table):
97
+ return (
98
+ "Answer the question based only on the following context:\n\n"
99
+ + get_relevant_texts(query, table)
100
+ + "\n\nQuestion: "
101
+ + query
102
+ )
103
+
104
+ def generate_response(query, table):
105
+ prompt = generate_prompt(query, table)
106
+ response = llm_openrouter.invoke(input=prompt)
107
+ return response.content
108
+
109
+ query = solara.reactive("Which song is about a boy who is having nightmares?")
110
+ @solara.component
111
+ def Page():
112
+ with solara.Column(margin=10):
113
+ solara.Markdown("# Metallica Song Finder Bot")
114
+ solara.InputText("Enter some query:", query, continuous_update=False)
115
+ if query.value != "":
116
+ df_results = table.search(query.value).limit(5).to_polars()
117
+ df_results = df_results.select(['Song', 'Album', '_distance', 'Lyrics', 'Artist'])
118
+ solara.Markdown("## Answer:")
119
+ solara.Markdown(generate_response(query.value, table))
120
+ solara.Markdown("## Context:")
121
+ solara.DataFrame(df_results, items_per_page=5)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ solara==1.39.0
2
+ polars==1.7.1
3
+ lancedb==0.13.0
4
+ sentence-transformers==3.1.1
5
+ langchain-community==0.3.0
6
+ openai==1.47.1