1- name : Post new Hugo articles to Discord
1+ name : RSS → Discord ( Hugo, com estado)
22
33on :
44 push :
55 paths :
66 - " content/**"
77 schedule :
8- - cron : " */15 * * * *" # a cada 15 min (UTC)
9- workflow_dispatch : {}
8+ - cron : " */15 * * * *"
9+ workflow_dispatch :
10+ inputs :
11+ force_latest :
12+ description : " Repostar o item mais recente (ignora estado)?"
13+ required : false
14+ default : " false"
15+ rss_url :
16+ description : " Override do feed (ex.: https://cvehunters.com/index.xml)"
17+ required : false
18+ default : " "
1019
1120jobs :
12- rss_to_discord :
21+ run :
1322 runs-on : ubuntu-latest
1423 steps :
15- - name : Checkout
16- uses : actions/checkout@v4
24+ - uses : actions/checkout@v4
25+
26+ # Restaura estado de IDs já postados
27+ - name : Restore RSS state cache
28+ id : cache
29+ uses : actions/cache@v4
30+ with :
31+ path : .rss_state.json
32+ key : rss-state-v1
1733
1834 - name : Setup Python
1935 uses : actions/setup-python@v5
@@ -23,62 +39,110 @@ jobs:
2339 - name : Install deps
2440 run : pip install feedparser requests
2541
26- - name : Create script
42+ - name : Publish to Discord
43+ env :
44+ DISCORD_WEBHOOK : ${{ secrets.DISCORD_WEBHOOK }}
45+ HUGO_RSS_DEFAULT : https://cvehunters.com/index.xml
46+ RSS_OVERRIDE : ${{ github.event.inputs.rss_url }}
47+ FORCE_LATEST : ${{ github.event.inputs.force_latest }}
2748 run : |
2849 cat > rss_to_discord.py << 'PY'
29- import os, time, requests, feedparser
30- from datetime import datetime, timedelta, timezone
50+ import os, json, re, requests, feedparser
51+ from html import unescape
52+ from pathlib import Path
3153
3254 WEBHOOK = os.environ["DISCORD_WEBHOOK"]
33- RSS_URL = os.environ.get("HUGO_RSS", "https://cvehunters.com/index.xml")
34- WINDOW_MINUTES = int(os.environ.get("WINDOW_MINUTES", "120"))
55+ RSS_URL = os.environ.get("RSS_OVERRIDE") or os.environ.get("HUGO_RSS_DEFAULT", "")
56+ FORCE_LATEST = (os.environ.get("FORCE_LATEST","false").lower() == "true")
57+ STATE_FILE = Path(".rss_state.json")
3558
36- now = datetime.now(timezone.utc)
37- cutoff = now - timedelta(minutes=WINDOW_MINUTES)
59+ def load_state():
60+ if STATE_FILE.exists():
61+ try:
62+ return set(json.loads(STATE_FILE.read_text()))
63+ except Exception:
64+ return set()
65+ return set()
3866
39- feed = feedparser.parse(RSS_URL)
40- new_entries = []
67+ def save_state(s):
68+ STATE_FILE.write_text(json.dumps(sorted(s)))
69+
70+ def entry_id(e):
71+ return e.get("id") or e.get("guid") or e.get("link")
4172
42- def to_dt(entry):
43- # tenta published_parsed, senão updated_parsed
44- struct = entry.get("published_parsed") or entry.get("updated_parsed")
45- if struct:
46- return datetime(*struct[:6], tzinfo=timezone.utc)
47- return None
73+ tagstrip_re = re.compile(r"<[^>]+>")
74+ def clean_html(s):
75+ s = tagstrip_re.sub("", s or "")
76+ s = re.sub(r"\s+"," ", s).strip()
77+ return unescape(s)
4878
79+ seen = load_state()
80+ feed = feedparser.parse(RSS_URL)
81+ print(f"[info] feed='{RSS_URL}', total entries={len(feed.entries)}, seen={len(seen)}")
82+
83+ to_post = []
4984 for e in feed.entries:
50- dt = to_dt(e)
51- if not dt:
85+ eid = entry_id(e)
86+ if not eid: # se não houver ID, usa o link como fallback
87+ eid = e.get("link")
88+ if not eid:
5289 continue
53- if dt >= cutoff:
54- new_entries.append(e)
55-
56- # publica do mais antigo pro mais novo
57- new_entries.sort(key=lambda e: e.get("published_parsed") or e.get("updated_parsed") or (0,))
58- for e in new_entries:
59- title = e.get("title", "Novo post")
60- url = e.get("link", "")
61- summary = (e.get("summary") or "")[:250]
90+ if eid not in seen:
91+ to_post.append((eid, e))
92+
93+ # do mais antigo para o mais novo (estável)
94+ to_post.reverse()
95+
96+ posted = 0
97+ for eid, e in to_post:
98+ title = (e.get("title") or "Novo post").strip()
99+ url = e.get("link","")
100+ summary = clean_html((e.get("summary_detail") or {}).get("value") or e.get("summary") or "")
101+ summary = summary[:250]
102+
62103 payload = {
63104 "content": f"🆕 **{title}**\n{url}",
64- "embeds": [
65- {"title": title, "url": url, "description": summary}
66- ],
105+ "embeds": [{"title": title, "url": url, "description": summary}],
67106 "allowed_mentions": {"parse": []}
68107 }
69108 try:
70- r = requests.post(WEBHOOK, json=payload, timeout=15)
109+ r = requests.post(WEBHOOK, json=payload, timeout=20)
110+ print(f"[post] {title} -> {r.status_code}")
71111 r.raise_for_status()
112+ seen.add(eid)
113+ posted += 1
72114 except Exception as ex:
73- print("Falhou ao postar no Discord:", ex )
115+ print(f"[erro] '{title}': {ex}" )
74116
75- print(f"RSS verificado. Itens novos publicados: {len(new_entries)}")
117+ # Se nada novo e pediu force, republica o mais recente
118+ if posted == 0 and FORCE_LATEST and feed.entries:
119+ e = feed.entries[0]
120+ title = (e.get("title") or "Post mais recente").strip()
121+ url = e.get("link","")
122+ summary = clean_html((e.get("summary_detail") or {}).get("value") or e.get("summary") or "")[:250]
123+ payload = {
124+ "content": f"📢 (FORÇADO) **{title}**\n{url}",
125+ "embeds": [{"title": title, "url": url, "description": summary}],
126+ "allowed_mentions": {"parse": []}
127+ }
128+ try:
129+ r = requests.post(WEBHOOK, json=payload, timeout=20)
130+ print(f"[force] {title} -> {r.status_code}")
131+ r.raise_for_status()
132+ posted = 1
133+ except Exception as ex:
134+ print(f"[erro] force_latest: {ex}")
135+
136+ save_state(seen)
137+ print(f"[done] publicados={posted}, state_size={len(seen)}")
76138 PY
139+ python rss_to_discord.py
77140
78- - name : Run script
79- env :
80- DISCORD_WEBHOOK : ${{ secrets.DISCORD_WEBHOOK }}
81- # ajuste se seu feed for outro caminho:
82- HUGO_RSS : https://cvehunters.com/index.xml
83- WINDOW_MINUTES : " 120"
84- run : python rss_to_discord.py
141+ # Salva o estado atualizado no cache
142+ - name : Save RSS state cache
143+ if : always()
144+ uses : actions/cache@v4
145+ with :
146+ path : .rss_state.json
147+ key : rss-state-v1
148+ restore-keys : rss-state-v1
0 commit comments