GianJSX commited on
Commit
15ad492
1 Parent(s): 371198a
chains/code_generator/base.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains import LLMChain
2
+ from langchain.memory import ConversationBufferMemory
3
+ from chains.code_generator.templates import chat_script_prompt
4
+
5
+
6
+ def chain_code_generator(llm) -> LLMChain:
7
+ # Memory
8
+ script_memory = ConversationBufferMemory(
9
+ input_key="output_format", memory_key="chat_history"
10
+ )
11
+
12
+ # Chain
13
+ return LLMChain(
14
+ llm=llm,
15
+ prompt=chat_script_prompt,
16
+ verbose=True,
17
+ output_key="script",
18
+ memory=script_memory,
19
+ )
chains/code_generator/templates.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import (
2
+ SystemMessagePromptTemplate,
3
+ HumanMessagePromptTemplate,
4
+ ChatPromptTemplate,
5
+ PromptTemplate,
6
+ )
7
+
8
+ # Prompt templates
9
+ system_template_script = PromptTemplate(
10
+ input_variables=["output_format", "html_content"],
11
+ template="""You are a helpful assitant that helps people create python scripts for web scraping.
12
+ --------------------------------
13
+ The example of the html content is: {html_content}
14
+ --------------------------------
15
+ You have to create a python function that extract information from an html code using web scrapping.
16
+ Try to select the most low-level class that is common among the elements to make de find_all function.
17
+
18
+ Your answer SHOULD only contain the python function code without any aditional word or character.
19
+
20
+ Import the used libraries above the function definition.
21
+
22
+ The function name must be extract_info.
23
+
24
+ The function have to receive the html data as a parameter.
25
+
26
+ Your function needs to extract information for all the elements with similar attributes.
27
+
28
+ Before calling .text or ['href'] methods, check if the element exists.
29
+
30
+ ----------------
31
+ FINAL ANSWER EXAMPLE:
32
+ from bs4 import BeautifulSoup
33
+
34
+ def extract_info(html):
35
+ ...CODE...
36
+ return {output_format}
37
+ ----------------
38
+
39
+ Always check if the element exists before calling some method.
40
+
41
+ """,
42
+ )
43
+
44
+ human_template_script = PromptTemplate(input_variables=[], template="give me the code")
45
+
46
+ # Chat Prompt objects
47
+ system_template_script_prompt = SystemMessagePromptTemplate.from_template(
48
+ system_template_script.template
49
+ )
50
+ human_template_script_prompt = HumanMessagePromptTemplate.from_template(
51
+ human_template_script.template
52
+ )
53
+ chat_script_prompt = ChatPromptTemplate.from_messages(
54
+ [system_template_script_prompt, human_template_script_prompt]
55
+ )
chains/output_format/base.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains import LLMChain
2
+ from langchain.memory import ConversationBufferMemory
3
+ from chains.output_format.templates import output_format_chat_prompt
4
+
5
+
6
+ def chain_output_format(llm) -> LLMChain:
7
+ # memory
8
+ html_memory = ConversationBufferMemory(
9
+ input_key="html_content", memory_key="chat_history"
10
+ )
11
+
12
+ # chain
13
+ return LLMChain(
14
+ llm=llm,
15
+ prompt=output_format_chat_prompt,
16
+ verbose=True,
17
+ output_key="output_format",
18
+ memory=html_memory,
19
+ )
chains/output_format/templates.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, PromptTemplate
2
+
3
+ # prompt templates
4
+ system_template_output_format = PromptTemplate(
5
+ input_variables = ['html_content'],
6
+ template='''You are a helpful assitant that helps people extract JSON information from HTML content.
7
+
8
+ The input is a HTML content.
9
+
10
+ The expected output is a JSON with a relevant information in the following html: {html_content}
11
+
12
+ Try to extract as much information as possible. Including images, links, etc.
13
+
14
+ The assitant answer should ONLY contain the JSON information without any aditional word or character.
15
+
16
+ The expected output format is an array of objects.
17
+
18
+ ''')
19
+
20
+ human_template_output_format = PromptTemplate(
21
+ input_variables = ['html_content'],
22
+ template='this is the html content: {html_content}'
23
+ )
24
+
25
+ # chat prompts objects
26
+ system_message_prompt = SystemMessagePromptTemplate.from_template(system_template_output_format.template)
27
+ human_message_prompt = HumanMessagePromptTemplate.from_template(human_template_output_format.template)
28
+ output_format_chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])