Skip to content

Commit 107f609

Browse files
committed
Create a script for updating packages
1 parent 293216f commit 107f609

File tree

7 files changed

+22
-39
lines changed

7 files changed

+22
-39
lines changed

src/fastapi_app/fast_update_hd_data.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
EMBEDDING_FIELDS = [
3131
'package_name', 'package_picture', 'url', 'installment_month', 'installment_limit',
32-
'price_to_reserve_for_this_package', 'shop_name', 'category', 'category_tags',
32+
'shop_name', 'category', 'category_tags',
3333
'preview_1_10', 'selling_point', 'meta_keywords', 'brand', 'min_max_age',
3434
'locations', 'meta_description', 'price_details', 'package_details',
3535
'important_info', 'payment_booking_info', 'general_info', 'early_signs_for_diagnosis',
@@ -125,9 +125,9 @@ async def seed_and_update_embeddings(engine):
125125

126126
for url, record in tqdm(new_records.items(), desc="Processing new records"):
127127
try:
128-
record["id"] = convert_to_int(record.get("id"))
129128
record["price"] = convert_to_float(record.get("price"))
130129
record["cash_discount"] = convert_to_float(record.get("cash_discount"))
130+
record["price_to_reserve_for_this_package"] = convert_to_float(record.get("price_to_reserve_for_this_package"))
131131
record["brand_ranking_position"] = convert_to_int(record.get("brand_ranking_position"))
132132

133133
if record["price"] is None:
@@ -150,7 +150,7 @@ async def seed_and_update_embeddings(engine):
150150
item_data[f'embedding_{field}'] = None
151151

152152
for key, value in item_data.items():
153-
if key not in ["id", "price", "cash_discount", "brand_ranking_position"]:
153+
if key not in ["price", "cash_discount", "price_to_reserve_for_this_package", "brand_ranking_position"]:
154154
item_data[key] = convert_to_str(value)
155155

156156
item = Item(**item_data)
@@ -169,9 +169,9 @@ async def seed_and_update_embeddings(engine):
169169
embedding_dimensions=openai_embed_dimensions,
170170
)
171171
setattr(item, f'embedding_{field}', embedding)
172-
logger.info(f"Updated embedding for {field} of item {item.id}")
172+
logger.info(f"Updated embedding for {field} of item {item.url}")
173173
except Exception as e:
174-
logger.error(f"Error updating embedding for {field} of item {item.id}: {e}")
174+
logger.error(f"Error updating embedding for {field} of item {item.url}: {e}")
175175

176176
session.merge(item)
177177
await session.commit()

src/fastapi_app/postgres_models.py

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,15 @@ class Base(DeclarativeBase, MappedAsDataclass):
1313

1414

1515
class Item(Base):
16-
__tablename__ = "packages"
17-
id: Mapped[int] = mapped_column(primary_key=True)
16+
__tablename__ = "packages_all"
1817
package_name: Mapped[str] = mapped_column()
1918
package_picture: Mapped[str] = mapped_column()
20-
url: Mapped[str] = mapped_column()
19+
url: Mapped[str] = mapped_column(primary_key=True)
2120
price: Mapped[float] = mapped_column()
2221
cash_discount: Mapped[float] = mapped_column()
2322
installment_month: Mapped[str] = mapped_column()
2423
installment_limit: Mapped[str] = mapped_column()
25-
price_to_reserve_for_this_package: Mapped[str] = mapped_column()
24+
price_to_reserve_for_this_package: Mapped[float] = mapped_column()
2625
shop_name: Mapped[str] = mapped_column()
2726
category: Mapped[str] = mapped_column()
2827
category_tags: Mapped[str] = mapped_column()
@@ -59,7 +58,6 @@ class Item(Base):
5958
embedding_url: Mapped[Vector] = mapped_column(Vector(1536))
6059
embedding_installment_month: Mapped[Vector] = mapped_column(Vector(1536))
6160
embedding_installment_limit: Mapped[Vector] = mapped_column(Vector(1536))
62-
embedding_price_to_reserve_for_this_package: Mapped[Vector] = (mapped_column(Vector(1536)))
6361
embedding_shop_name: Mapped[Vector] = mapped_column(Vector(1536))
6462
embedding_category: Mapped[Vector] = mapped_column(Vector(1536))
6563
embedding_category_tags: Mapped[Vector] = mapped_column(Vector(1536))
@@ -100,7 +98,6 @@ def to_dict(self, include_embedding: bool = False):
10098
"embedding_url",
10199
"embedding_installment_month",
102100
"embedding_installment_limit",
103-
"embedding_price_to_reserve_for_this_package",
104101
"embedding_shop_name",
105102
"embedding_category",
106103
"embedding_category_tags",
@@ -142,7 +139,6 @@ def to_dict(self, include_embedding: bool = False):
142139
"embedding_url",
143140
"embedding_installment_month",
144141
"embedding_installment_limit",
145-
"embedding_price_to_reserve_for_this_package",
146142
"embedding_shop_name",
147143
"embedding_category",
148144
"embedding_category_tags",
@@ -245,9 +241,6 @@ def to_str_for_embedding_installment_month(self):
245241
def to_str_for_embedding_installment_limit(self):
246242
return f"Installment Limit: {self.installment_limit}" if self.installment_limit else ""
247243

248-
def to_str_for_embedding_price_to_reserve_for_this_package(self):
249-
return f"Price to Reserve for This Package: {self.price_to_reserve_for_this_package}" if self.price_to_reserve_for_this_package else ""
250-
251244
def to_str_for_embedding_shop_name(self):
252245
return f"Shop Name: {self.shop_name}" if self.shop_name else ""
253246

@@ -376,13 +369,6 @@ def to_str_for_embedding_faq(self):
376369
postgresql_with={"m": 16, "ef_construction": 64},
377370
postgresql_ops={"embedding_installment_limit": "vector_ip_ops"},
378371
),
379-
Index(
380-
"hnsw_index_for_embedding_price_to_reserve_for_this_package",
381-
Item.embedding_price_to_reserve_for_this_package,
382-
postgresql_using="hnsw",
383-
postgresql_with={"m": 16, "ef_construction": 64},
384-
postgresql_ops={"embedding_price_to_reserve_for_this_package": "vector_ip_ops"},
385-
),
386372
Index(
387373
"hnsw_index_for_embedding_shop_name",
388374
Item.embedding_shop_name,

src/fastapi_app/postgres_searcher.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ async def hybrid_search(
5454
COALESCE(embedding_url <=> :embedding, 1),
5555
COALESCE(embedding_installment_month <=> :embedding, 1),
5656
COALESCE(embedding_installment_limit <=> :embedding, 1),
57-
COALESCE(embedding_price_to_reserve_for_this_package <=> :embedding, 1),
5857
COALESCE(embedding_shop_name <=> :embedding, 1),
5958
COALESCE(embedding_category <=> :embedding, 1),
6059
COALESCE(embedding_category_tags <=> :embedding, 1),
@@ -107,7 +106,6 @@ async def hybrid_search(
107106
to_tsvector('thai', COALESCE(url, '')) ||
108107
to_tsvector('thai', COALESCE(installment_month, '')) ||
109108
to_tsvector('thai', COALESCE(installment_limit, '')) ||
110-
to_tsvector('thai', COALESCE(price_to_reserve_for_this_package, '')) ||
111109
to_tsvector('thai', COALESCE(shop_name, '')) ||
112110
to_tsvector('thai', COALESCE(category, '')) ||
113111
to_tsvector('thai', COALESCE(category_tags, '')) ||
@@ -145,7 +143,6 @@ async def hybrid_search(
145143
to_tsvector('thai', COALESCE(url, '')) ||
146144
to_tsvector('thai', COALESCE(installment_month, '')) ||
147145
to_tsvector('thai', COALESCE(installment_limit, '')) ||
148-
to_tsvector('thai', COALESCE(price_to_reserve_for_this_package, '')) ||
149146
to_tsvector('thai', COALESCE(shop_name, '')) ||
150147
to_tsvector('thai', COALESCE(category, '')) ||
151148
to_tsvector('thai', COALESCE(category_tags, '')) ||
@@ -183,7 +180,6 @@ async def hybrid_search(
183180
to_tsvector('thai', COALESCE(url, '')) ||
184181
to_tsvector('thai', COALESCE(installment_month, '')) ||
185182
to_tsvector('thai', COALESCE(installment_limit, '')) ||
186-
to_tsvector('thai', COALESCE(price_to_reserve_for_this_package, '')) ||
187183
to_tsvector('thai', COALESCE(shop_name, '')) ||
188184
to_tsvector('thai', COALESCE(category, '')) ||
189185
to_tsvector('thai', COALESCE(category_tags, '')) ||

src/fastapi_app/prompts/query.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching database rows.
2-
You have access to an Azure PostgreSQL database with an packages table that has columns for id, package_name, package_picture, url, price, cash_discount, installment_month, installment_limit, price_to_reserve_for_this_package, shop_name, category, category_tags, preview_1_10, selling_point, meta_keywords, brand, min_max_age, locations, meta_description, price_details, package_details, important_info, payment_booking_info, general_info, early_signs_for_diagnosis, how_to_diagnose, hdcare_summary, common_question, know_this_disease, courses_of_action, signals_to_proceed_surgery, get_to_know_this_surgery, comparisons, getting_ready, recovery, side_effects, review_4_5_stars, brand_option_in_thai_name, brand_ranking_position and faq.
2+
You have access to an Azure PostgreSQL database with an packages table that has columns for id, package_name, package_picture, url, price, cash_discount, installment_month, installment_limit,
3+
shop_name, category, category_tags, preview_1_10, selling_point, meta_keywords, brand, min_max_age, locations, meta_description, price_details, package_details, important_info, payment_booking_info, general_info, early_signs_for_diagnosis, how_to_diagnose, hdcare_summary, common_question, know_this_disease, courses_of_action, signals_to_proceed_surgery, get_to_know_this_surgery, comparisons, getting_ready, recovery, side_effects, review_4_5_stars, brand_option_in_thai_name, brand_ranking_position and faq.
34
Generate a search query based on the conversation and the new question.
45
If the question is not in Thai, translate the question to Thai before generating the search query.
56
If you cannot generate a search query, return the original user question.

src/fastapi_app/rag_advanced.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ async def hybrid_search(self, messages, top, vector_search, text_search):
7373
filters=filters,
7474
)
7575

76-
sources_content = [f"[{(item.id)}]:{item.to_str_for_broad_rag()}\n\n" for item in results]
76+
sources_content = [f"[{(item.url)}]:{item.to_str_for_broad_rag()}\n\n" for item in results]
7777

7878
thought_steps = [
7979
ThoughtStep(
@@ -134,7 +134,7 @@ async def run(
134134
results = await self.searcher.simple_sql_search(filters=specify_package_filters)
135135

136136
if results:
137-
sources_content = [f"[{(item.id)}]:{item.to_str_for_narrow_rag()}\n\n" for item in results]
137+
sources_content = [f"[{(item.url)}]:{item.to_str_for_narrow_rag()}\n\n" for item in results]
138138

139139
thought_steps = [
140140
ThoughtStep(

src/fastapi_app/seed_hd_data.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
embedding_fields = [
2323
'embedding_package_name', 'embedding_package_picture', 'embedding_url',
2424
'embedding_installment_month', 'embedding_installment_limit',
25-
'embedding_price_to_reserve_for_this_package', 'embedding_shop_name',
25+
'embedding_shop_name',
2626
'embedding_category', 'embedding_category_tags', 'embedding_preview_1_10',
2727
'embedding_selling_point', 'embedding_meta_keywords', 'embedding_brand',
2828
'embedding_min_max_age', 'embedding_locations', 'embedding_meta_description',
@@ -88,9 +88,9 @@ async def seed_data(engine):
8888
logger.info("Starting to insert records into the database...")
8989
for record in tqdm(records, desc="Inserting records"):
9090
try:
91-
record["id"] = convert_to_int(record["id"])
92-
if record["id"] is None:
93-
logger.error(f"Skipping record with invalid id: {record}")
91+
record["url"] = convert_to_int(record["url"])
92+
if record["url"] is None:
93+
logger.error(f"Skipping record with invalid url: {record}")
9494
continue
9595

9696
if "price" in record:
@@ -104,7 +104,7 @@ async def seed_data(engine):
104104
logger.error(f"Skipping record with invalid numeric fields: {record}")
105105
continue
106106

107-
item = await session.execute(select(Item).filter(Item.id == record["id"]))
107+
item = await session.execute(select(Item).filter(Item.url == record["url"]))
108108
if item.scalars().first():
109109
continue
110110

@@ -114,14 +114,14 @@ async def seed_data(engine):
114114
item_data[field] = None
115115

116116
for key, value in item_data.items():
117-
if key not in ["id", "price", "cash_discount", "brand_ranking_position"]:
117+
if key not in ["price", "cash_discount", "brand_ranking_position"]:
118118
item_data[key] = convert_to_str(value)
119119

120120
item = Item(**item_data)
121121
session.add(item)
122122

123123
except Exception as e:
124-
logger.error(f"Error inserting record with id {record['id']}: {e}")
124+
logger.error(f"Error inserting record with url {record['url']}: {e}")
125125
await session.rollback()
126126
continue
127127

src/fastapi_app/update_embeddings.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
EMBEDDING_FIELDS = [
2222
'package_name', 'package_picture', 'url', 'installment_month', 'installment_limit',
23-
'price_to_reserve_for_this_package', 'shop_name', 'category', 'category_tags',
23+
'shop_name', 'category', 'category_tags',
2424
'preview_1_10', 'selling_point', 'meta_keywords', 'brand', 'min_max_age',
2525
'locations', 'meta_description','price_details', 'package_details', 'important_info',
2626
'payment_booking_info', 'general_info', 'early_signs_for_diagnosis', 'how_to_diagnose',
@@ -57,9 +57,9 @@ async def update_embeddings():
5757
embedding_dimensions=openai_embed_dimensions,
5858
)
5959
setattr(item, f'embedding_{field}', embedding)
60-
logger.info(f"Updated embedding for {field} of item {item.id}")
60+
logger.info(f"Updated embedding for {field} of item {item.url}")
6161
except Exception as e:
62-
logger.error(f"Error updating embedding for {field} of item {item.id}: {e}")
62+
logger.error(f"Error updating embedding for {field} of item {item.url}: {e}")
6363

6464
session.add(item)
6565
await session.commit()

0 commit comments

Comments
 (0)