@@ -19,7 +19,7 @@ async function getGenAIClient() {
19
19
20
20
/**
21
21
* Scrapes URL content using Gemini's urlContext tool
22
- *
22
+ *
23
23
* @param {string[] } urls - Array of URLs to scrape
24
24
* @returns {Promise<Array<{url: string, canonical?: string, title?: string, summary?: string, topImageUrl?: string, html?: string, status: string, error?: string}>> }
25
25
*/
@@ -33,7 +33,7 @@ export default async function scrapeUrlsWithGemini(urls) {
33
33
try {
34
34
// Process all URLs in a single LLM call for efficiency
35
35
const urlList = urls . map ( ( url , index ) => `${ index + 1 } . ${ url } ` ) . join ( '\n' ) ;
36
-
36
+
37
37
const generateContentArgs = {
38
38
model : 'gemini-2.5-flash' ,
39
39
contents : [
@@ -70,21 +70,24 @@ Requirements:
70
70
] ,
71
71
config : {
72
72
tools : [ { urlContext : { } } ] ,
73
- systemInstruction : 'You are a web content analyzer that extracts structured information from web pages for fact-checking purposes.' ,
73
+ systemInstruction :
74
+ 'You are a web content analyzer that extracts structured information from web pages for fact-checking purposes.' ,
74
75
responseModalities : [ 'TEXT' ] ,
75
76
temperature : 0.1 , // Low temperature for consistent extraction
76
77
maxOutputTokens : 4096 ,
77
78
} ,
78
79
} ;
79
80
80
- const response = await genAIClient . models . generateContent ( generateContentArgs ) ;
81
-
81
+ const response = await genAIClient . models . generateContent (
82
+ generateContentArgs
83
+ ) ;
84
+
82
85
if ( ! response . candidates || ! response . candidates [ 0 ] ) {
83
86
throw new Error ( 'No response candidates received' ) ;
84
87
}
85
88
86
89
const responseText = response . candidates [ 0 ] . content . parts [ 0 ] . text ;
87
-
90
+
88
91
// Parse the JSON response
89
92
let extractedDataArray ;
90
93
try {
@@ -96,9 +99,12 @@ Requirements:
96
99
extractedDataArray = JSON . parse ( responseText ) ;
97
100
}
98
101
} catch ( parseError ) {
99
- console . warn ( '[geminiUrlScraper] Failed to parse JSON response:' , responseText ) ;
102
+ console . warn (
103
+ '[geminiUrlScraper] Failed to parse JSON response:' ,
104
+ responseText
105
+ ) ;
100
106
// Fallback: create error results for all URLs
101
- return urls . map ( url => ( {
107
+ return urls . map ( ( url ) => ( {
102
108
url,
103
109
canonical : url ,
104
110
title : null ,
@@ -111,8 +117,9 @@ Requirements:
111
117
}
112
118
113
119
// Ensure we have results for all input URLs
114
- const results = urls . map ( url => {
115
- const extracted = extractedDataArray . find ( item => item . url === url ) || { } ;
120
+ const results = urls . map ( ( url ) => {
121
+ const extracted =
122
+ extractedDataArray . find ( ( item ) => item . url === url ) || { } ;
116
123
return {
117
124
url,
118
125
canonical : extracted . canonical || url ,
@@ -125,17 +132,16 @@ Requirements:
125
132
} ) ;
126
133
127
134
return results ;
128
-
129
135
} catch ( error ) {
130
136
console . error ( '[geminiUrlScraper] Error processing URLs:' , error ) ;
131
-
137
+
132
138
rollbar . error ( 'Gemini URL scraping error' , {
133
139
urls,
134
140
error : error . message ,
135
141
} ) ;
136
142
137
143
// Return error results for all URLs
138
- return urls . map ( url => ( {
144
+ return urls . map ( ( url ) => ( {
139
145
url,
140
146
canonical : url ,
141
147
title : null ,
@@ -146,4 +152,4 @@ Requirements:
146
152
error : error . message ,
147
153
} ) ) ;
148
154
}
149
- }
155
+ }
0 commit comments