Spaces:
Runtime error
Runtime error
chains
Browse files- chains/code_generator/base.py +19 -0
- chains/code_generator/templates.py +55 -0
- chains/output_format/base.py +19 -0
- chains/output_format/templates.py +28 -0
chains/code_generator/base.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.chains import LLMChain
|
2 |
+
from langchain.memory import ConversationBufferMemory
|
3 |
+
from chains.code_generator.templates import chat_script_prompt
|
4 |
+
|
5 |
+
|
6 |
+
def chain_code_generator(llm) -> LLMChain:
|
7 |
+
# Memory
|
8 |
+
script_memory = ConversationBufferMemory(
|
9 |
+
input_key="output_format", memory_key="chat_history"
|
10 |
+
)
|
11 |
+
|
12 |
+
# Chain
|
13 |
+
return LLMChain(
|
14 |
+
llm=llm,
|
15 |
+
prompt=chat_script_prompt,
|
16 |
+
verbose=True,
|
17 |
+
output_key="script",
|
18 |
+
memory=script_memory,
|
19 |
+
)
|
chains/code_generator/templates.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.prompts import (
|
2 |
+
SystemMessagePromptTemplate,
|
3 |
+
HumanMessagePromptTemplate,
|
4 |
+
ChatPromptTemplate,
|
5 |
+
PromptTemplate,
|
6 |
+
)
|
7 |
+
|
8 |
+
# Prompt templates
|
9 |
+
system_template_script = PromptTemplate(
|
10 |
+
input_variables=["output_format", "html_content"],
|
11 |
+
template="""You are a helpful assitant that helps people create python scripts for web scraping.
|
12 |
+
--------------------------------
|
13 |
+
The example of the html content is: {html_content}
|
14 |
+
--------------------------------
|
15 |
+
You have to create a python function that extract information from an html code using web scrapping.
|
16 |
+
Try to select the most low-level class that is common among the elements to make de find_all function.
|
17 |
+
|
18 |
+
Your answer SHOULD only contain the python function code without any aditional word or character.
|
19 |
+
|
20 |
+
Import the used libraries above the function definition.
|
21 |
+
|
22 |
+
The function name must be extract_info.
|
23 |
+
|
24 |
+
The function have to receive the html data as a parameter.
|
25 |
+
|
26 |
+
Your function needs to extract information for all the elements with similar attributes.
|
27 |
+
|
28 |
+
Before calling .text or ['href'] methods, check if the element exists.
|
29 |
+
|
30 |
+
----------------
|
31 |
+
FINAL ANSWER EXAMPLE:
|
32 |
+
from bs4 import BeautifulSoup
|
33 |
+
|
34 |
+
def extract_info(html):
|
35 |
+
...CODE...
|
36 |
+
return {output_format}
|
37 |
+
----------------
|
38 |
+
|
39 |
+
Always check if the element exists before calling some method.
|
40 |
+
|
41 |
+
""",
|
42 |
+
)
|
43 |
+
|
44 |
+
human_template_script = PromptTemplate(input_variables=[], template="give me the code")
|
45 |
+
|
46 |
+
# Chat Prompt objects
|
47 |
+
system_template_script_prompt = SystemMessagePromptTemplate.from_template(
|
48 |
+
system_template_script.template
|
49 |
+
)
|
50 |
+
human_template_script_prompt = HumanMessagePromptTemplate.from_template(
|
51 |
+
human_template_script.template
|
52 |
+
)
|
53 |
+
chat_script_prompt = ChatPromptTemplate.from_messages(
|
54 |
+
[system_template_script_prompt, human_template_script_prompt]
|
55 |
+
)
|
chains/output_format/base.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.chains import LLMChain
|
2 |
+
from langchain.memory import ConversationBufferMemory
|
3 |
+
from chains.output_format.templates import output_format_chat_prompt
|
4 |
+
|
5 |
+
|
6 |
+
def chain_output_format(llm) -> LLMChain:
|
7 |
+
# memory
|
8 |
+
html_memory = ConversationBufferMemory(
|
9 |
+
input_key="html_content", memory_key="chat_history"
|
10 |
+
)
|
11 |
+
|
12 |
+
# chain
|
13 |
+
return LLMChain(
|
14 |
+
llm=llm,
|
15 |
+
prompt=output_format_chat_prompt,
|
16 |
+
verbose=True,
|
17 |
+
output_key="output_format",
|
18 |
+
memory=html_memory,
|
19 |
+
)
|
chains/output_format/templates.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, PromptTemplate
|
2 |
+
|
3 |
+
# prompt templates
|
4 |
+
system_template_output_format = PromptTemplate(
|
5 |
+
input_variables = ['html_content'],
|
6 |
+
template='''You are a helpful assitant that helps people extract JSON information from HTML content.
|
7 |
+
|
8 |
+
The input is a HTML content.
|
9 |
+
|
10 |
+
The expected output is a JSON with a relevant information in the following html: {html_content}
|
11 |
+
|
12 |
+
Try to extract as much information as possible. Including images, links, etc.
|
13 |
+
|
14 |
+
The assitant answer should ONLY contain the JSON information without any aditional word or character.
|
15 |
+
|
16 |
+
The expected output format is an array of objects.
|
17 |
+
|
18 |
+
''')
|
19 |
+
|
20 |
+
human_template_output_format = PromptTemplate(
|
21 |
+
input_variables = ['html_content'],
|
22 |
+
template='this is the html content: {html_content}'
|
23 |
+
)
|
24 |
+
|
25 |
+
# chat prompts objects
|
26 |
+
system_message_prompt = SystemMessagePromptTemplate.from_template(system_template_output_format.template)
|
27 |
+
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template_output_format.template)
|
28 |
+
output_format_chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
|