prototipo-1-rag / edgedb /populate_edgedb.py
Pecximenes's picture
Adding v1 of rag-agent
853a071
raw
history blame
2.79 kB
import edgedb
import json
import os
from dotenv import load_dotenv
load_dotenv()
client = edgedb.create_client()
for root, dirs, files in os.walk("../Banco_de_Dados/Coleta/downloaded_files"):
for file in files:
if file.endswith(".json"):
with open(os.path.join(root, file), 'r') as f:
data = json.load(f)
data_path = './'+'/'.join(root.split('/')[5:])
# Insert Website
client.query('''
INSERT Website {
url := <str>$url,
relative_path := <str>$relative_path,
hyperrefs := <array<str>>$gov_links,
images := {},
videos := {},
text := (
INSERT Text {
content := <str>$content
}
)
};
''', url=data['absolute_url'], \
relative_path=data_path, \
gov_links=data['gov_links'], \
content=data['text'])
# Insert Images
for image in data.get('images', []):
client.query('''
UPDATE Website
FILTER .url = <str>$url
SET {
images += {
(INSERT Image {
name := <str>$name,
path := <str>$path,
url := <str>$image_url,
hyperlink := <str>$hyperlink,
alt := <str>$alt,
})
}
};
''', url=data['absolute_url'], \
path=image['path'], \
name=image['name'], \
image_url=image['url'], \
hyperlink=image['hyperlink'], \
alt=image['alt'])
for video in data.get('videos', []):
client.query('''
UPDATE Website
FILTER .url = <str>$url
SET {
videos += {
(INSERT Video {
name := <str>$name,
url := <str>$video_url,
hyperlink := <str>$hyperlink,
alt := <str>$alt,
})
}
};
''', url=data['absolute_url'], \
name=video['name'], \
video_url=video['url'], \
hyperlink=video['hyperlink'], \
alt=video['alt'])