Skip to content

Unable to Bypass Cloudflare Captcha #276

@hazhayder

Description

@hazhayder

I have been trying to scrape Upwork and I am unable to bypass the captcha

Image
from botasaurus.browser import browser, Driver
from selenium.webdriver.common.by import By
import re

@browser(
    wait_for_complete_page_load=True,
    headless=False
)
def scrape_heading_task(driver: Driver, data):
    # Visit the Omkar Cloud website
    # Retrieve the heading element's text
    driver.get(data, True)
    links = driver.get_all_links()
    linkedin_links = [link for link in links if "linkedin.com" in link]
    emails = extract_emails(links)
    return {
        "linkedin_links": linkedin_links,
        "emails": emails
    }


def extract_emails(strings):
    email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
    emails = []

    for string in strings:
        found_emails = re.findall(email_pattern, string)
        emails.extend(found_emails)

    return emails


# Initiate the web scraping task
scrape_heading_task(["https://www.upwork.com/nx/search/jobs/?amount=1000-4999,5000-&client_hires=1-9,10-&hourly_rate=15-&nbs=1&payment_verified=1&q=NOT%20%28Wordpress,%20OR%20Drupal,%20OR%20Joomla,%20OR%20Typo3,%20OR%20Shopify,%20OR%20Shopify%20OR%20Templates,%20OR%20Shopify%20OR%20Theme,%20OR%20Shopify%20OR%20Apps,%20OR%20Shopify%20OR%20Development,%20OR%20Shopify%20OR%20Website%20OR%20Redesign%29&sort=recency&subcategory2_uid=531770282589057029,531770282584862733&t=0,1"])

Please note the code and what I am trying would not make sense but I just wanted to see the capability and verify claims.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions