Skip to content

Commit 8f41ec0

Browse files
authored
Bump stable version to 0.3.0 (#33)
* reconstruct code * show aff;fix latex replace bug; * remove pdb * checkout to specific ref * update readme * fix invalid tar * Retreive arxiv paper from Atom feed (#31) * retrieve from rss * fix bug * fix bug * fix bug * clean code * Release v0.3.0 (#32) * bump version to 0.3.0 * update readme * update uv.lock
1 parent e36b35f commit 8f41ec0

File tree

10 files changed

+444
-334
lines changed

10 files changed

+444
-334
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ dist/
66
wheels/
77
.vscode/
88
*.egg-info
9+
.env
910

1011
# Virtual environments
1112
.venv

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
## ✨ Features
3434
- Totally free! All the calculation can be done in the Github Action runner locally within its quota (for public repo).
3535
- AI-generated TL;DR for you to quickly pick up target papers.
36+
- Affiliations of the paper are resolved and presented.
3637
- Links of PDF and code implementation (if any) presented in the e-mail.
3738
- List of papers sorted by relevance with your recent research interest.
3839
- Fast deployment via fork this repo and set environment variables in the Github Action Page.
@@ -56,7 +57,7 @@ Below are all the secrets you need to set. They are invisible to anyone includin
5657
| :--- | :---: | :--- | :--- | :--- |
5758
| ZOTERO_ID || str | User ID of your Zotero account. Get your ID from [here](https://www.zotero.org/settings/security). | 12345678 |
5859
| ZOTERO_KEY || str | An Zotero API key with read access. Get a key from [here](https://www.zotero.org/settings/security). | AB5tZ877P2j7Sm2Mragq041H |
59-
| ARXIV_QUERY || str | The search query for retrieving arxiv papers. Refer to the [official document](https://info.arxiv.org/help/api/user-manual.html#query_details) for details. The example queries papers about AI, CV, NLP, ML. Find the abbr of your research area from [here](https://arxiv.org/category_taxonomy). | cat:cs.AI OR cat:cs.CV OR cat:cs.LG OR cat:cs.CL |
60+
| ARXIV_QUERY || str | The categories of target arxiv papers. Use `+` to concatenate multiple categories. The example retrieves papers about AI, CV, NLP, ML. Find the abbr of your research area from [here](https://arxiv.org/category_taxonomy). | cs.AI+cs.CV+cs.LG+cs.CL |
6061
| SMTP_SERVER || str | The SMTP server that sends the email. I recommend to utilize a seldom-used email for this. Ask your email provider (Gmail, QQ, Outlook, ...) for its SMTP server| smtp.qq.com |
6162
| SMTP_PORT || int | The port of SMTP server. | 465 |
6263
| SENDER || str | The email account of the SMTP server that sends you email. | [email protected] |
@@ -118,6 +119,9 @@ The TLDR of each paper is generated by a lightweight LLM (Qwen2.5-3b-instruct-q4
118119
- The recommendation algorithm is very simple, it may not accurately reflect your interest. Welcome better ideas for improving the algorithm!
119120
- This workflow deploys an LLM on the cpu of Github Action runner, and it takes about 70s to generate a TLDR for one paper. High `MAX_PAPER_NUM` can lead the execution time exceed the limitation of Github Action runner (6h per execution for public repo, and 2000 mins per month for private repo). Commonly, the quota given to public repo is definitely enough for individual use. If you have special requirements, you can deploy the workflow in your own server, or use a self-hosted Github Action runner, or pay for the exceeded execution time.
120121

122+
## 👯‍♂️ Contribution
123+
Any issue and PR are welcomed! But remember that **each PR should merge to the `dev` branch**.
124+
121125
## 📃 License
122126
Distributed under the AGPLv3 License. See `LICENSE` for detail.
123127

construct_email.py

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
1-
import arxiv
1+
from paper import ArxivPaper
22
import math
3+
from tqdm import tqdm
4+
from email.header import Header
5+
from email.mime.text import MIMEText
6+
from email.utils import parseaddr, formataddr
7+
import smtplib
8+
import datetime
9+
310
framework = """
411
<!DOCTYPE HTML>
512
<html>
@@ -49,7 +56,7 @@ def get_empty_html():
4956
"""
5057
return block_template
5158

52-
def get_block_html(title:str, authors:str, rate:str,arxiv_id:str, abstract:str, pdf_url:str, code_url:str=None):
59+
def get_block_html(title:str, authors:str, rate:str,arxiv_id:str, abstract:str, pdf_url:str, code_url:str=None, affiliations:str=None):
5360
code = f'<a href="{code_url}" style="display: inline-block; text-decoration: none; font-size: 14px; font-weight: bold; color: #fff; background-color: #5bc0de; padding: 8px 16px; border-radius: 4px; margin-left: 8px;">Code</a>' if code_url else ''
5461
block_template = """
5562
<table border="0" cellpadding="0" cellspacing="0" width="100%" style="font-family: Arial, sans-serif; border: 1px solid #ddd; border-radius: 8px; padding: 16px; background-color: #f9f9f9;">
@@ -61,6 +68,8 @@ def get_block_html(title:str, authors:str, rate:str,arxiv_id:str, abstract:str,
6168
<tr>
6269
<td style="font-size: 14px; color: #666; padding: 8px 0;">
6370
{authors}
71+
<br>
72+
<i>{affiliations}</i>
6473
</td>
6574
</tr>
6675
<tr>
@@ -87,7 +96,7 @@ def get_block_html(title:str, authors:str, rate:str,arxiv_id:str, abstract:str,
8796
</tr>
8897
</table>
8998
"""
90-
return block_template.format(title=title, authors=authors,rate=rate,arxiv_id=arxiv_id, abstract=abstract, pdf_url=pdf_url, code=code)
99+
return block_template.format(title=title, authors=authors,rate=rate,arxiv_id=arxiv_id, abstract=abstract, pdf_url=pdf_url, code=code, affiliations=affiliations)
91100

92101
def get_stars(score:float):
93102
full_star = '<span class="full-star">⭐</span>'
@@ -106,25 +115,46 @@ def get_stars(score:float):
106115
return '<div class="star-wrapper">'+full_star * full_star_num + half_star * half_star_num + '</div>'
107116

108117

109-
def render_email(papers:list[arxiv.Result]):
118+
def render_email(papers:list[ArxivPaper]):
110119
parts = []
111120
if len(papers) == 0 :
112121
return framework.replace('__CONTENT__', get_empty_html())
113122

114-
for p in papers:
115-
# crop the abstract
116-
'''
117-
summary = p.summary
118-
summary = summary[:min(600, len(summary))]
119-
if len(summary) == 600:
120-
summary += '...'
121-
'''
123+
for p in tqdm(papers,desc='Rendering Email'):
122124
rate = get_stars(p.score)
123125
authors = [a.name for a in p.authors[:5]]
124126
authors = ', '.join(authors)
125127
if len(p.authors) > 5:
126128
authors += ', ...'
127-
parts.append(get_block_html(p.title, authors,rate,p.arxiv_id ,p.tldr, p.pdf_url, p.code_url))
129+
if p.affiliations is not None:
130+
affiliations = p.affiliations[:5]
131+
affiliations = ', '.join(affiliations)
132+
if len(p.affiliations) > 5:
133+
affiliations += ', ...'
134+
else:
135+
affiliations = 'Unknown Affiliation'
136+
parts.append(get_block_html(p.title, authors,rate,p.arxiv_id ,p.tldr, p.pdf_url, p.code_url, affiliations))
128137

129138
content = '<br>' + '</br><br>'.join(parts) + '</br>'
130139
return framework.replace('__CONTENT__', content)
140+
141+
def send_email(sender:str, receiver:str, password:str,smtp_server:str,smtp_port:int, html:str,):
142+
def _format_addr(s):
143+
name, addr = parseaddr(s)
144+
return formataddr((Header(name, 'utf-8').encode(), addr))
145+
146+
msg = MIMEText(html, 'html', 'utf-8')
147+
msg['From'] = _format_addr('Github Action <%s>' % sender)
148+
msg['To'] = _format_addr('You <%s>' % receiver)
149+
today = datetime.datetime.now().strftime('%Y/%m/%d')
150+
msg['Subject'] = Header(f'Daily arXiv {today}', 'utf-8').encode()
151+
152+
try:
153+
server = smtplib.SMTP(smtp_server, smtp_port)
154+
server.starttls()
155+
except smtplib.SMTPServerDisconnected:
156+
server = smtplib.SMTP_SSL(smtp_server, smtp_port)
157+
158+
server.login(sender, password)
159+
server.sendmail(sender, [receiver], msg.as_string())
160+
server.quit()

llm.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from llama_cpp import Llama
2+
from openai import OpenAI
3+
from loguru import logger
4+
5+
GLOBAL_LLM = None
6+
7+
class LLM:
8+
def __init__(self, api_key: str = None, base_url: str = None, model: str = None):
9+
if api_key:
10+
self.llm = OpenAI(api_key=api_key, base_url=base_url)
11+
else:
12+
self.llm = Llama.from_pretrained(
13+
repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
14+
filename="qwen2.5-3b-instruct-q4_k_m.gguf",
15+
n_ctx=32_000,
16+
n_threads=4,
17+
verbose=False,
18+
)
19+
self.model = model
20+
21+
def generate(self, messages: list[dict]) -> str:
22+
if isinstance(self.llm, OpenAI):
23+
response = self.llm.chat.completions.create(messages=messages,temperature=0,model=self.model)
24+
return response.choices[0].message.content
25+
else:
26+
response = self.llm.create_chat_completion(messages=messages,temperature=0)
27+
return response["choices"][0]["message"]["content"]
28+
29+
def set_global_llm(api_key: str = None, base_url: str = None, model: str = None):
30+
global GLOBAL_LLM
31+
GLOBAL_LLM = LLM(api_key=api_key, base_url=base_url, model=model)
32+
33+
def get_llm() -> LLM:
34+
if GLOBAL_LLM is None:
35+
logger.info("No global LLM found, creating a default one. Use `set_global_llm` to set a custom one.")
36+
set_global_llm()
37+
return GLOBAL_LLM

0 commit comments

Comments
 (0)