Skip to content

Commit 3eb9cd8

Browse files
authored
Update rss-to-discord.yml
1 parent 1fa8c54 commit 3eb9cd8

File tree

1 file changed

+109
-45
lines changed

1 file changed

+109
-45
lines changed
Lines changed: 109 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,35 @@
1-
name: Post new Hugo articles to Discord
1+
name: RSS → Discord (Hugo, com estado)
22

33
on:
44
push:
55
paths:
66
- "content/**"
77
schedule:
8-
- cron: "*/15 * * * *" # a cada 15 min (UTC)
9-
workflow_dispatch: {}
8+
- cron: "*/15 * * * *"
9+
workflow_dispatch:
10+
inputs:
11+
force_latest:
12+
description: "Repostar o item mais recente (ignora estado)?"
13+
required: false
14+
default: "false"
15+
rss_url:
16+
description: "Override do feed (ex.: https://cvehunters.com/index.xml)"
17+
required: false
18+
default: ""
1019

1120
jobs:
12-
rss_to_discord:
21+
run:
1322
runs-on: ubuntu-latest
1423
steps:
15-
- name: Checkout
16-
uses: actions/checkout@v4
24+
- uses: actions/checkout@v4
25+
26+
# Restaura estado de IDs já postados
27+
- name: Restore RSS state cache
28+
id: cache
29+
uses: actions/cache@v4
30+
with:
31+
path: .rss_state.json
32+
key: rss-state-v1
1733

1834
- name: Setup Python
1935
uses: actions/setup-python@v5
@@ -23,62 +39,110 @@ jobs:
2339
- name: Install deps
2440
run: pip install feedparser requests
2541

26-
- name: Create script
42+
- name: Publish to Discord
43+
env:
44+
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }}
45+
HUGO_RSS_DEFAULT: https://cvehunters.com/index.xml
46+
RSS_OVERRIDE: ${{ github.event.inputs.rss_url }}
47+
FORCE_LATEST: ${{ github.event.inputs.force_latest }}
2748
run: |
2849
cat > rss_to_discord.py << 'PY'
29-
import os, time, requests, feedparser
30-
from datetime import datetime, timedelta, timezone
50+
import os, json, re, requests, feedparser
51+
from html import unescape
52+
from pathlib import Path
3153
3254
WEBHOOK = os.environ["DISCORD_WEBHOOK"]
33-
RSS_URL = os.environ.get("HUGO_RSS", "https://cvehunters.com/index.xml")
34-
WINDOW_MINUTES = int(os.environ.get("WINDOW_MINUTES", "120"))
55+
RSS_URL = os.environ.get("RSS_OVERRIDE") or os.environ.get("HUGO_RSS_DEFAULT", "")
56+
FORCE_LATEST = (os.environ.get("FORCE_LATEST","false").lower() == "true")
57+
STATE_FILE = Path(".rss_state.json")
3558
36-
now = datetime.now(timezone.utc)
37-
cutoff = now - timedelta(minutes=WINDOW_MINUTES)
59+
def load_state():
60+
if STATE_FILE.exists():
61+
try:
62+
return set(json.loads(STATE_FILE.read_text()))
63+
except Exception:
64+
return set()
65+
return set()
3866
39-
feed = feedparser.parse(RSS_URL)
40-
new_entries = []
67+
def save_state(s):
68+
STATE_FILE.write_text(json.dumps(sorted(s)))
69+
70+
def entry_id(e):
71+
return e.get("id") or e.get("guid") or e.get("link")
4172
42-
def to_dt(entry):
43-
# tenta published_parsed, senão updated_parsed
44-
struct = entry.get("published_parsed") or entry.get("updated_parsed")
45-
if struct:
46-
return datetime(*struct[:6], tzinfo=timezone.utc)
47-
return None
73+
tagstrip_re = re.compile(r"<[^>]+>")
74+
def clean_html(s):
75+
s = tagstrip_re.sub("", s or "")
76+
s = re.sub(r"\s+"," ", s).strip()
77+
return unescape(s)
4878
79+
seen = load_state()
80+
feed = feedparser.parse(RSS_URL)
81+
print(f"[info] feed='{RSS_URL}', total entries={len(feed.entries)}, seen={len(seen)}")
82+
83+
to_post = []
4984
for e in feed.entries:
50-
dt = to_dt(e)
51-
if not dt:
85+
eid = entry_id(e)
86+
if not eid: # se não houver ID, usa o link como fallback
87+
eid = e.get("link")
88+
if not eid:
5289
continue
53-
if dt >= cutoff:
54-
new_entries.append(e)
55-
56-
# publica do mais antigo pro mais novo
57-
new_entries.sort(key=lambda e: e.get("published_parsed") or e.get("updated_parsed") or (0,))
58-
for e in new_entries:
59-
title = e.get("title", "Novo post")
60-
url = e.get("link", "")
61-
summary = (e.get("summary") or "")[:250]
90+
if eid not in seen:
91+
to_post.append((eid, e))
92+
93+
# do mais antigo para o mais novo (estável)
94+
to_post.reverse()
95+
96+
posted = 0
97+
for eid, e in to_post:
98+
title = (e.get("title") or "Novo post").strip()
99+
url = e.get("link","")
100+
summary = clean_html((e.get("summary_detail") or {}).get("value") or e.get("summary") or "")
101+
summary = summary[:250]
102+
62103
payload = {
63104
"content": f"🆕 **{title}**\n{url}",
64-
"embeds": [
65-
{"title": title, "url": url, "description": summary}
66-
],
105+
"embeds": [{"title": title, "url": url, "description": summary}],
67106
"allowed_mentions": {"parse": []}
68107
}
69108
try:
70-
r = requests.post(WEBHOOK, json=payload, timeout=15)
109+
r = requests.post(WEBHOOK, json=payload, timeout=20)
110+
print(f"[post] {title} -> {r.status_code}")
71111
r.raise_for_status()
112+
seen.add(eid)
113+
posted += 1
72114
except Exception as ex:
73-
print("Falhou ao postar no Discord:", ex)
115+
print(f"[erro] '{title}': {ex}")
74116
75-
print(f"RSS verificado. Itens novos publicados: {len(new_entries)}")
117+
# Se nada novo e pediu force, republica o mais recente
118+
if posted == 0 and FORCE_LATEST and feed.entries:
119+
e = feed.entries[0]
120+
title = (e.get("title") or "Post mais recente").strip()
121+
url = e.get("link","")
122+
summary = clean_html((e.get("summary_detail") or {}).get("value") or e.get("summary") or "")[:250]
123+
payload = {
124+
"content": f"📢 (FORÇADO) **{title}**\n{url}",
125+
"embeds": [{"title": title, "url": url, "description": summary}],
126+
"allowed_mentions": {"parse": []}
127+
}
128+
try:
129+
r = requests.post(WEBHOOK, json=payload, timeout=20)
130+
print(f"[force] {title} -> {r.status_code}")
131+
r.raise_for_status()
132+
posted = 1
133+
except Exception as ex:
134+
print(f"[erro] force_latest: {ex}")
135+
136+
save_state(seen)
137+
print(f"[done] publicados={posted}, state_size={len(seen)}")
76138
PY
139+
python rss_to_discord.py
77140
78-
- name: Run script
79-
env:
80-
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }}
81-
# ajuste se seu feed for outro caminho:
82-
HUGO_RSS: https://cvehunters.com/index.xml
83-
WINDOW_MINUTES: "120"
84-
run: python rss_to_discord.py
141+
# Salva o estado atualizado no cache
142+
- name: Save RSS state cache
143+
if: always()
144+
uses: actions/cache@v4
145+
with:
146+
path: .rss_state.json
147+
key: rss-state-v1
148+
restore-keys: rss-state-v1

0 commit comments

Comments
 (0)