Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 11 additions & 26 deletions examples/markdownify/markdownify_scrapegraphai.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,20 @@
"""
Example script demonstrating the markdownify functionality
Scrape a webpage as clean markdown using scrapegraph-py v2 API.
Replaces the old markdownify() call with scrape().
"""

import json
import os

from dotenv import load_dotenv
from scrapegraph_py import Client
from scrapegraph_py.logger import sgai_logger

def main():
# Load environment variables
load_dotenv()

# Set up logging
sgai_logger.set_logging(level="INFO")
load_dotenv()

# Initialize the client
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
if not api_key:
raise ValueError("SCRAPEGRAPH_API_KEY environment variable not found")
sgai_client = Client(api_key=api_key)
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
if not api_key:
raise ValueError("SCRAPEGRAPH_API_KEY environment variable not found")

# Example 1: Convert a website to Markdown
print("Example 1: Converting website to Markdown")
print("-" * 50)
response = sgai_client.markdownify(
website_url="https://example.com"
)
print("Markdown output:")
print(response["result"]) # Access the result key from the dictionary
print("\nMetadata:")
print(response.get("metadata", {})) # Use get() with default value
print("\n" + "=" * 50 + "\n")
if __name__ == "__main__":
main()
with Client(api_key=api_key) as client:
response = client.scrape(url="https://example.com")
print(json.dumps(response, indent=2))
85 changes: 11 additions & 74 deletions examples/search_graph/scrapegraphai/searchscraper_scrapegraphai.py
Original file line number Diff line number Diff line change
@@ -1,83 +1,20 @@
"""
Example implementation of search-based scraping using Scrapegraph AI.
This example demonstrates how to use the searchscraper to extract information from the web.
Search the web and extract AI-structured results using scrapegraph-py v2 API.
Replaces the old searchscraper() call with search().
"""

import json
import os
from typing import Dict, Any

from dotenv import load_dotenv
from scrapegraph_py import Client
from scrapegraph_py.logger import sgai_logger

def format_response(response: Dict[str, Any]) -> None:
"""
Format and print the search response in a readable way.

Args:
response (Dict[str, Any]): The response from the search API
"""
print("\n" + "="*50)
print("SEARCH RESULTS")
print("="*50)

# Print request ID
print(f"\nRequest ID: {response['request_id']}")

# Print number of sources
urls = response.get('reference_urls', [])
print(f"\nSources Processed: {len(urls)}")

# Print the extracted information
print("\nExtracted Information:")
print("-"*30)
if isinstance(response['result'], dict):
for key, value in response['result'].items():
print(f"\n{key.upper()}:")
if isinstance(value, list):
for item in value:
print(f" • {item}")
else:
print(f" {value}")
else:
print(response['result'])

# Print source URLs
if urls:
print("\nSources:")
print("-"*30)
for i, url in enumerate(urls, 1):
print(f"{i}. {url}")
print("\n" + "="*50)

def main():
# Load environment variables
load_dotenv()

# Get API key
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
if not api_key:
raise ValueError("SCRAPEGRAPH_API_KEY not found in environment variables")

# Configure logging
sgai_logger.set_logging(level="INFO")

# Initialize client
sgai_client = Client(api_key=api_key)

try:
# Basic search scraper example
print("\nSearching for information...")

search_response = sgai_client.searchscraper(
user_prompt="Extract webpage information"
)
format_response(search_response)
load_dotenv()

except Exception as e:
print(f"\nError occurred: {str(e)}")
finally:
# Always close the client
sgai_client.close()
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
if not api_key:
raise ValueError("SCRAPEGRAPH_API_KEY not found in environment variables")

if __name__ == "__main__":
main()
with Client(api_key=api_key) as client:
response = client.search(query="Extract webpage information")
print(json.dumps(response, indent=2))
Original file line number Diff line number Diff line change
@@ -1,45 +1,23 @@
"""
Example implementation using scrapegraph-py client directly.
Extract structured data from a webpage using scrapegraph-py v2 API.
Replaces the old smartscraper() call with extract().
"""

import json
import os

from dotenv import load_dotenv
from scrapegraph_py import Client
from scrapegraph_py.logger import sgai_logger

def main():
# Load environment variables from .env file
load_dotenv()

# Get API key from environment variables
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
if not api_key:
raise ValueError("SCRAPEGRAPH_API_KEY non trovato nelle variabili d'ambiente")

# Set up logging
sgai_logger.set_logging(level="INFO")

# Initialize the client with API key from environment
sgai_client = Client(api_key=api_key)

try:
# SmartScraper request
response = sgai_client.smartscraper(
website_url="https://scrapegraphai.com",
user_prompt="Extract the founders' informations"
)

# Print the response
print(f"Request ID: {response['request_id']}")
print(f"Result: {response['result']}")
if response.get('reference_urls'):
print(f"Reference URLs: {response['reference_urls']}")
load_dotenv()

except Exception as e:
print(f"Error occurred: {str(e)}")
finally:
# Always close the client
sgai_client.close()
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
if not api_key:
raise ValueError("SCRAPEGRAPH_API_KEY not found in environment variables")

if __name__ == "__main__":
main()
with Client(api_key=api_key) as client:
response = client.extract(
url="https://scrapegraphai.com",
prompt="Extract the founders' informations",
)
print(json.dumps(response, indent=2))
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = [
"jsonschema>=4.25.1",
"duckduckgo-search>=8.1.1",
"pydantic>=2.12.5",
"scrapegraph-py>=1.44.0",
"scrapegraph-py>=2.0.0",
]

readme = "README.md"
Expand Down
30 changes: 11 additions & 19 deletions scrapegraphai/graphs/smart_scraper_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,31 +79,23 @@ def _create_graph(self) -> BaseGraph:
if self.llm_model == "scrapegraphai/smart-scraper":
try:
from scrapegraph_py import Client
from scrapegraph_py.logger import sgai_logger
except ImportError:
raise ImportError(
"scrapegraph_py is not installed. Please install it using 'pip install scrapegraph-py'."
)

sgai_logger.set_logging(level="INFO")

# Initialize the client with explicit API key
sgai_client = Client(api_key=self.config.get("api_key"))

# SmartScraper request
response = sgai_client.smartscraper(
website_url=self.source,
user_prompt=self.prompt,
)

# Use logging instead of print for better production practices
if "request_id" in response and "result" in response:
logger.info(f"Request ID: {response['request_id']}")
logger.info(f"Result: {response['result']}")
else:
logger.warning("Missing expected keys in response.")
with Client(api_key=self.config.get("api_key")) as sgai_client:
# Extract request (v2 API)
response = sgai_client.extract(
url=self.source,
prompt=self.prompt,
output_schema=self.schema,
)

sgai_client.close()
if "id" in response:
logger.info(f"Request ID: {response['id']}")
if "data" in response:
logger.info(f"Result: {response['data']}")

return response

Expand Down
Loading