Skip to content

Commit 05afecd

Browse files
Remove jq as a dependency and adopt pip-tools to manage all dependencies (elastic#122)
1 parent d3e7a7d commit 05afecd

File tree

3 files changed

+232
-33
lines changed

3 files changed

+232
-33
lines changed

example-apps/chatbot-rag-app/data/index_data.py

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from elasticsearch import Elasticsearch, NotFoundError
22
from langchain.vectorstores import ElasticsearchStore
3-
from langchain.document_loaders import JSONLoader
3+
from langchain.docstore.document import Document
44
from langchain.text_splitter import RecursiveCharacterTextSplitter
55
from dotenv import load_dotenv
6+
import json
67
import os
78
import time
89

@@ -55,29 +56,19 @@ def install_elser():
5556
)
5657

5758

58-
# Metadata extraction function
59-
def metadata_func(record: dict, metadata: dict) -> dict:
60-
metadata["name"] = record.get("name")
61-
metadata["summary"] = record.get("summary")
62-
metadata["url"] = record.get("url")
63-
metadata["category"] = record.get("category")
64-
metadata["updated_at"] = record.get("updated_at")
65-
66-
return metadata
67-
68-
6959
def main():
7060
install_elser()
7161

7262
print(f"Loading data from ${FILE}")
7363

74-
loader = JSONLoader(
75-
file_path=FILE,
76-
jq_schema=".[]",
77-
content_key="content",
78-
metadata_func=metadata_func,
79-
)
80-
workplace_docs = loader.load()
64+
metadata_keys = ['name', 'summary', 'url', 'category', 'updated_at']
65+
workplace_docs = []
66+
with open(FILE, 'rt') as f:
67+
for doc in json.loads(f.read()):
68+
workplace_docs.append(Document(
69+
page_content=doc['content'],
70+
metadata={k: doc.get(k) for k in metadata_keys}
71+
))
8172

8273
print(f"Loaded {len(workplace_docs)} documents")
8374

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# package management
2+
pip-tools
3+
4+
# core dependencies
5+
elasticsearch
6+
langchain
7+
tiktoken
8+
flask
9+
flask-cors
10+
python-dotenv
11+
12+
# OpenAI dependencies
13+
openai
14+
15+
# Vertex AI dependencies
16+
google-cloud-aiplatform
17+
grpcio-status
18+
19+
# BedRock dependencies
20+
boto3
21+
22+
# TBD if these are still needed
23+
exceptiongroup
24+
importlib-metadata
25+
numexpr
Lines changed: 197 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,259 @@
1+
#
2+
# This file is autogenerated by pip-compile with Python 3.11
3+
# by the following command:
4+
#
5+
# pip-compile requirements.in
6+
#
17
aiohttp==3.8.5
8+
# via
9+
# langchain
10+
# openai
211
aiosignal==1.3.1
12+
# via aiohttp
313
annotated-types==0.5.0
14+
# via pydantic
415
anyio==3.7.1
16+
# via langchain
517
async-timeout==4.0.3
18+
# via aiohttp
619
attrs==23.1.0
20+
# via aiohttp
721
blinker==1.6.2
22+
# via flask
823
boto3==1.28.61
24+
# via -r requirements.in
925
botocore==1.31.61
26+
# via
27+
# boto3
28+
# s3transfer
29+
build==1.0.3
30+
# via
31+
# -r requirements.in
32+
# pip-tools
1033
cachetools==5.3.1
34+
# via google-auth
1135
certifi==2023.7.22
36+
# via
37+
# elastic-transport
38+
# requests
1239
charset-normalizer==3.2.0
40+
# via
41+
# aiohttp
42+
# requests
1343
click==8.1.7
44+
# via
45+
# flask
46+
# pip-tools
1447
dataclasses-json==0.5.14
48+
# via langchain
1549
elastic-transport==8.4.0
50+
# via elasticsearch
1651
elasticsearch==8.9.0
17-
exceptiongroup==1.1.3
18-
Flask==2.3.3
19-
Flask-Cors==4.0.0
52+
# via -r requirements.in
53+
exceptiongroup==1.2.0
54+
# via -r requirements.in
55+
flask==2.3.3
56+
# via
57+
# -r requirements.in
58+
# flask-cors
59+
flask-cors==4.0.0
60+
# via -r requirements.in
2061
frozenlist==1.4.0
21-
google-api-core==2.14.0
62+
# via
63+
# aiohttp
64+
# aiosignal
65+
google-api-core[grpc]==2.14.0
66+
# via
67+
# google-cloud-aiplatform
68+
# google-cloud-bigquery
69+
# google-cloud-core
70+
# google-cloud-resource-manager
71+
# google-cloud-storage
2272
google-auth==2.23.2
73+
# via
74+
# google-api-core
75+
# google-cloud-core
76+
# google-cloud-storage
2377
google-cloud-aiplatform==1.35.0
78+
# via -r requirements.in
2479
google-cloud-bigquery==3.13.0
80+
# via google-cloud-aiplatform
2581
google-cloud-core==2.3.3
82+
# via
83+
# google-cloud-bigquery
84+
# google-cloud-storage
2685
google-cloud-resource-manager==1.10.4
86+
# via google-cloud-aiplatform
2787
google-cloud-storage==2.11.0
88+
# via google-cloud-aiplatform
2889
google-crc32c==1.5.0
90+
# via google-resumable-media
2991
google-resumable-media==2.6.0
30-
googleapis-common-protos==1.61.0
92+
# via
93+
# google-cloud-bigquery
94+
# google-cloud-storage
95+
googleapis-common-protos[grpc]==1.61.0
96+
# via
97+
# google-api-core
98+
# grpc-google-iam-v1
99+
# grpcio-status
31100
grpc-google-iam-v1==0.12.7
101+
# via google-cloud-resource-manager
32102
grpcio==1.59.3
103+
# via
104+
# google-api-core
105+
# google-cloud-bigquery
106+
# googleapis-common-protos
107+
# grpc-google-iam-v1
108+
# grpcio-status
33109
grpcio-status==1.59.3
110+
# via
111+
# -r requirements.in
112+
# google-api-core
34113
idna==3.4
114+
# via
115+
# anyio
116+
# requests
117+
# yarl
35118
importlib-metadata==6.8.0
119+
# via -r requirements.in
36120
itsdangerous==2.1.2
37-
Jinja2==3.1.2
121+
# via flask
122+
jinja2==3.1.2
123+
# via flask
38124
jmespath==1.0.1
39-
jq==1.4.1
125+
# via
126+
# boto3
127+
# botocore
40128
jsonpatch==1.33
129+
# via langchain
41130
jsonpointer==2.4
131+
# via jsonpatch
42132
langchain==0.0.333
133+
# via -r requirements.in
43134
langsmith==0.0.65
44-
MarkupSafe==2.1.3
135+
# via langchain
136+
markupsafe==2.1.3
137+
# via
138+
# jinja2
139+
# werkzeug
45140
marshmallow==3.20.1
141+
# via dataclasses-json
46142
multidict==6.0.4
143+
# via
144+
# aiohttp
145+
# yarl
47146
mypy-extensions==1.0.0
147+
# via typing-inspect
48148
numexpr==2.8.5
149+
# via -r requirements.in
49150
numpy==1.25.2
151+
# via
152+
# langchain
153+
# numexpr
154+
# shapely
50155
openai==0.27.9
156+
# via -r requirements.in
51157
packaging==23.1
158+
# via
159+
# build
160+
# google-cloud-aiplatform
161+
# google-cloud-bigquery
162+
# marshmallow
163+
pip-tools==7.3.0
164+
# via -r requirements.in
52165
proto-plus==1.22.3
166+
# via
167+
# google-cloud-aiplatform
168+
# google-cloud-bigquery
169+
# google-cloud-resource-manager
53170
protobuf==4.25.1
171+
# via
172+
# google-api-core
173+
# google-cloud-aiplatform
174+
# google-cloud-bigquery
175+
# google-cloud-resource-manager
176+
# googleapis-common-protos
177+
# grpc-google-iam-v1
178+
# grpcio-status
179+
# proto-plus
54180
pyasn1==0.5.0
181+
# via
182+
# pyasn1-modules
183+
# rsa
55184
pyasn1-modules==0.3.0
185+
# via google-auth
56186
pydantic==2.3.0
57-
pydantic_core==2.6.3
187+
# via
188+
# langchain
189+
# langsmith
190+
pydantic-core==2.6.3
191+
# via pydantic
192+
pyproject-hooks==1.0.0
193+
# via
194+
# -r requirements.in
195+
# build
58196
python-dateutil==2.8.2
197+
# via
198+
# botocore
199+
# google-cloud-bigquery
59200
python-dotenv==1.0.0
60-
PyYAML==6.0.1
201+
# via -r requirements.in
202+
pyyaml==6.0.1
203+
# via langchain
204+
regex==2023.10.3
205+
# via tiktoken
61206
requests==2.31.0
207+
# via
208+
# google-api-core
209+
# google-cloud-bigquery
210+
# google-cloud-storage
211+
# langchain
212+
# langsmith
213+
# openai
214+
# tiktoken
62215
rsa==4.9
216+
# via google-auth
63217
s3transfer==0.7.0
218+
# via boto3
64219
shapely==2.0.2
220+
# via google-cloud-aiplatform
65221
six==1.16.0
222+
# via python-dateutil
66223
sniffio==1.3.0
67-
SQLAlchemy==2.0.20
224+
# via anyio
225+
sqlalchemy==2.0.20
226+
# via langchain
68227
tenacity==8.2.3
228+
# via langchain
229+
tiktoken==0.5.1
230+
# via -r requirements.in
69231
tqdm==4.66.1
232+
# via openai
233+
typing-extensions==4.7.1
234+
# via
235+
# pydantic
236+
# pydantic-core
237+
# sqlalchemy
238+
# typing-inspect
70239
typing-inspect==0.9.0
71-
typing_extensions==4.7.1
240+
# via dataclasses-json
72241
urllib3==1.26.16
73-
Werkzeug==2.3.7
242+
# via
243+
# botocore
244+
# elastic-transport
245+
# requests
246+
werkzeug==2.3.7
247+
# via flask
248+
wheel==0.41.3
249+
# via pip-tools
74250
yarl==1.9.2
251+
# via aiohttp
75252
zipp==3.17.0
76-
tiktoken==0.5.1
253+
# via
254+
# -r requirements.in
255+
# importlib-metadata
256+
257+
# The following packages are considered to be unsafe in a requirements file:
258+
# pip
259+
# setuptools

0 commit comments

Comments
 (0)