Skip to content

Commit 76c2a3d

Browse files
authored
Merge pull request #3 from wafflestudio/feature/add-transaction
Feature/add transaction
2 parents 7cd6d91 + 4d83651 commit 76c2a3d

File tree

1 file changed

+119
-29
lines changed

1 file changed

+119
-29
lines changed

handler.py

Lines changed: 119 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,114 @@
11
import json
22
import pymysql
33
import os
4-
import random
4+
import datetime
5+
from pytz import timezone
6+
from itertools import compress
57
from slack import send_slack_message
6-
from menu_crawler import VetRestaurantCrawler, GraduateDormRestaurantCrawler, SnucoRestaurantCrawler
8+
from menu_crawler import text_normalizer, VetRestaurantCrawler, GraduateDormRestaurantCrawler, SnucoRestaurantCrawler
9+
10+
11+
def compare_restaurants(db_restaurants, crawled_meals):
12+
codes = [restaurant.get('code') for restaurant in db_restaurants]
13+
new_restaurants = []
14+
for meal in crawled_meals:
15+
code = text_normalizer(meal.restaurant, True)
16+
if code not in codes:
17+
new_restaurants.append(dict(
18+
code=code,
19+
name_kr=meal.restaurant,
20+
))
21+
codes.append(code)
22+
return new_restaurants
23+
24+
25+
def compare_menus(db_menus, crawled_meals, restaurants):
26+
fields = ['restaurant_id', 'code', 'date', 'type', 'price', 'etc']
27+
restaurant_dict = {restaurant.get('code'): restaurant.get('id') for restaurant in restaurants}
28+
crawled_menus = [meal.as_dict() for meal in crawled_meals]
29+
for menu in crawled_menus:
30+
restaurant_code = text_normalizer(menu.pop('restaurant'), True)
31+
menu['restaurant_id'] = restaurant_dict.get(restaurant_code)
32+
name = menu.pop('name')
33+
menu['name_kr'] = name
34+
menu['code'] = text_normalizer(name, True)
35+
36+
db_not_found = [True] * len(db_menus)
37+
crawled_not_found = [True] * len(crawled_menus)
38+
for db_idx in range(len(db_menus)):
39+
for crawled_idx in range(len(crawled_menus)):
40+
if all((db_menus[db_idx].get(field, None) == crawled_menus[crawled_idx].get(field)) for field in fields):
41+
db_not_found[db_idx] = False
42+
crawled_not_found[crawled_idx] = False
43+
return list(compress(crawled_menus, crawled_not_found)), list(compress(db_menus, db_not_found))
44+
45+
46+
def restaurants_transaction(crawled_meals, cursor):
47+
get_restaurants_query = """
48+
SELECT code
49+
FROM restaurant;
50+
"""
51+
cursor.execute(get_restaurants_query)
52+
db_restaurants = cursor.fetchall()
53+
new_restaurants = compare_restaurants(db_restaurants, crawled_meals)
54+
print(f"New Restaurants: {repr(new_restaurants)}")
55+
if new_restaurants:
56+
slack_message = "New Restaurant(s) Found: "
57+
for restaurant in new_restaurants:
58+
slack_message = slack_message + '"' + restaurant.get('name_kr') + '" '
59+
send_slack_message(slack_message)
60+
insert_restaurants_query = """
61+
INSERT INTO restaurant(code, name_kr)
62+
VALUES (%(code)s, %(name_kr)s);
63+
"""
64+
cursor.executemany(insert_restaurants_query, new_restaurants)
65+
print("Restaurants checked")
66+
67+
68+
def menus_transaction(crawled_meals, cursor):
69+
get_restaurants_query = """
70+
SELECT id, code
71+
FROM restaurant;
72+
"""
73+
cursor.execute(get_restaurants_query)
74+
restaurants = cursor.fetchall()
75+
today = datetime.datetime.now(timezone('Asia/Seoul')).date()
76+
get_menus_query = f"""
77+
SELECT id, restaurant_id, code, date, type, price, etc
78+
FROM menu
79+
WHERE date>='{today.isoformat()}';
80+
"""
81+
cursor.execute(get_menus_query)
82+
db_menus = cursor.fetchall()
83+
84+
new_menus, deleted_menus = compare_menus(db_menus, crawled_meals, restaurants)
85+
86+
print(f"Deleted Menus: {repr(deleted_menus)}")
87+
if deleted_menus:
88+
send_slack_message(f"Deleted Menus: {repr(deleted_menus)}")
89+
deleted_menus_id = [str(menu.get('id')) for menu in deleted_menus]
90+
delete_menus_query = f"""
91+
DELETE FROM menu
92+
WHERE id in ({','.join(deleted_menus_id)});
93+
"""
94+
cursor.execute(delete_menus_query)
95+
96+
print(f"New Menus: {repr(new_menus)}")
97+
new_menus_to_check = list(filter(lambda menu: ':' in menu.get('name_kr'), new_menus))
98+
if new_menus_to_check:
99+
send_slack_message(f"New Menus to be Checked: {repr(new_menus_to_check)}")
100+
insert_menus_query = """
101+
INSERT INTO menu(restaurant_id, code, date, type, name_kr, price, etc)
102+
VALUES (%(restaurant_id)s, %(code)s, %(date)s, %(type)s, %(name_kr)s, %(price)s, %(etc)s);
103+
"""
104+
cursor.executemany(insert_menus_query, new_menus)
105+
106+
print("Menus checked")
107+
7108

8109
def crawl(event, context):
9110
try:
111+
print("start crawling")
10112
siksha_db = pymysql.connect(
11113
user=os.environ.get('DB_USER', 'root'),
12114
passwd=os.environ.get('DB_PASSWORD', 'waffle'),
@@ -15,35 +117,23 @@ def crawl(event, context):
15117
charset='utf8'
16118
)
17119
cursor = siksha_db.cursor(pymysql.cursors.DictCursor)
18-
# TRANSACTION START
19-
get_restaurants_query = """
20-
SELECT *
21-
FROM restaurant
22-
"""
23-
cursor.execute(get_restaurants_query)
24-
restaurants = cursor.fetchall()
25-
print('log using stdout')
26-
print(f'get restaurants result: {repr(restaurants)}')
27-
insert_restaurants_query = """
28-
INSERT INTO restaurant(code, name_kr, name_en, addr, lat, lng)
29-
VALUES (%(code)s, %(name_kr)s, %(name_en)s, %(addr)s, %(lat)s, %(lng)s);
30-
"""
31-
new_restaurants = [
32-
dict(
33-
code=f"test{random.random()}",
34-
name_kr="한글명",
35-
name_en="영어명",
36-
addr="한글주소",
37-
lat=0,
38-
lng=0
39-
) for i in range(10)
40-
]
41-
cursor.executemany(insert_restaurants_query, new_restaurants)
42-
# TRANSACTION END
120+
121+
crawled_meals = VetRestaurantCrawler().run_30days() \
122+
+ GraduateDormRestaurantCrawler().run_30days() \
123+
+ SnucoRestaurantCrawler().run_30days()
124+
today = datetime.datetime.now(timezone('Asia/Seoul')).date()
125+
crawled_meals = list(filter(lambda meal: meal.date >= today, crawled_meals))
126+
restaurants_transaction(crawled_meals, cursor)
127+
siksha_db.commit()
128+
menus_transaction(crawled_meals, cursor)
43129
siksha_db.commit()
44-
send_slack_message("crawling has been successfully done")
45-
return "crawling has been successfully done"
130+
131+
send_slack_message("Crawling has been successfully done")
132+
return "Crawling has been successfully done"
46133
except:
47134
siksha_db.rollback()
48135
send_slack_message("crawling has been failed")
49136
return "crawling has been failed"
137+
138+
139+
#crawl(None, None)

0 commit comments

Comments
 (0)