From 0c916220e4d7c612b022516d7fac6d918bbec657 Mon Sep 17 00:00:00 2001 From: Jennysson Junior Date: Wed, 6 Aug 2025 00:01:43 -0300 Subject: [PATCH] feat: :memo: add internalizion with i18n provide a brazilian portuguese version of guide and configure to have any language translation --- docusaurus.config.js | 17 +- i18n/en/code.json | 309 ++++++++++++++++ .../options.json | 14 + .../current.json | 54 +++ .../current/10-key-concepts/1-what-is-rag.mdx | 5 + .../10-key-concepts/2-rag-usecases.mdx | 28 ++ .../10-key-concepts/3-components-of-rag.mdx | 17 + .../current/10-key-concepts/_category_.json | 8 + .../current/20-dev-env/1-dev-env-setup.mdx | 196 ++++++++++ .../current/20-dev-env/2-setup-pre-reqs.mdx | 26 ++ .../current/20-dev-env/_category_.json | 8 + .../30-prepare-the-data/1-load-data.mdx | 5 + .../30-prepare-the-data/2-chunk-data.mdx | 29 ++ .../30-prepare-the-data/3-embed-data.mdx | 33 ++ .../30-prepare-the-data/4-ingest-data.mdx | 34 ++ .../30-prepare-the-data/_category_.json | 8 + .../1-lecture-notes.mdx | 5 + .../2-create-vector-index.mdx | 20 + .../3-vector-search.mdx | 57 +++ .../4-pre-filtering.mdx | 126 +++++++ .../40-perform-vector-search/_category_.json | 8 + .../50-build-rag-app/1-build-rag-app.mdx | 40 ++ .../50-build-rag-app/2-add-reranking.mdx | 20 + .../current/50-build-rag-app/_category_.json | 8 + .../current/60-add-memory/1-add-memory.mdx | 74 ++++ .../current/60-add-memory/_category_.json | 8 + .../current/intro.mdx | 26 ++ .../current/summary.mdx | 17 + i18n/en/docusaurus-theme-classic/footer.json | 26 ++ i18n/en/docusaurus-theme-classic/navbar.json | 10 + i18n/pt-BR/code.json | 345 ++++++++++++++++++ .../current.json | 54 +++ .../current/10-key-concepts/1-what-is-rag.mdx | 5 + .../10-key-concepts/2-rag-usecases.mdx | 28 ++ .../10-key-concepts/3-components-of-rag.mdx | 17 + .../current/10-key-concepts/_category_.json | 8 + .../current/20-dev-env/1-dev-env-setup.mdx | 196 ++++++++++ .../current/20-dev-env/2-setup-pre-reqs.mdx | 26 ++ .../current/20-dev-env/_category_.json | 8 + .../30-prepare-the-data/1-load-data.mdx | 5 + .../30-prepare-the-data/2-chunk-data.mdx | 29 ++ .../30-prepare-the-data/3-embed-data.mdx | 33 ++ .../30-prepare-the-data/4-ingest-data.mdx | 34 ++ .../30-prepare-the-data/_category_.json | 8 + .../1-lecture-notes.mdx | 5 + .../2-create-vector-index.mdx | 20 + .../3-vector-search.mdx | 57 +++ .../4-pre-filtering.mdx | 126 +++++++ .../40-perform-vector-search/_category_.json | 8 + .../50-build-rag-app/1-build-rag-app.mdx | 40 ++ .../50-build-rag-app/2-add-reranking.mdx | 20 + .../current/50-build-rag-app/_category_.json | 8 + .../current/60-add-memory/1-add-memory.mdx | 74 ++++ .../current/60-add-memory/_category_.json | 8 + .../current/intro.mdx | 26 ++ .../current/summary.mdx | 17 + .../docusaurus-theme-classic/footer.json | 26 ++ .../docusaurus-theme-classic/navbar.json | 10 + src/components/HomepageFeatures/index.js | 24 +- src/pages/index.js | 11 +- 60 files changed, 2505 insertions(+), 7 deletions(-) create mode 100644 i18n/en/code.json create mode 100644 i18n/en/docusaurus-plugin-content-blog/options.json create mode 100644 i18n/en/docusaurus-plugin-content-docs/current.json create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/1-what-is-rag.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/2-rag-usecases.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/3-components-of-rag.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/_category_.json create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/20-dev-env/1-dev-env-setup.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/20-dev-env/2-setup-pre-reqs.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/20-dev-env/_category_.json create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/1-load-data.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/2-chunk-data.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/3-embed-data.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/4-ingest-data.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/_category_.json create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/1-lecture-notes.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/2-create-vector-index.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/3-vector-search.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/4-pre-filtering.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/_category_.json create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/50-build-rag-app/1-build-rag-app.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/50-build-rag-app/2-add-reranking.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/50-build-rag-app/_category_.json create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/60-add-memory/1-add-memory.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/60-add-memory/_category_.json create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/intro.mdx create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/summary.mdx create mode 100644 i18n/en/docusaurus-theme-classic/footer.json create mode 100644 i18n/en/docusaurus-theme-classic/navbar.json create mode 100644 i18n/pt-BR/code.json create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current.json create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/1-what-is-rag.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/2-rag-usecases.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/3-components-of-rag.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/_category_.json create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/20-dev-env/1-dev-env-setup.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/20-dev-env/2-setup-pre-reqs.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/20-dev-env/_category_.json create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/1-load-data.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/2-chunk-data.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/3-embed-data.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/4-ingest-data.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/_category_.json create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/1-lecture-notes.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/2-create-vector-index.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/3-vector-search.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/4-pre-filtering.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/_category_.json create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/50-build-rag-app/1-build-rag-app.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/50-build-rag-app/2-add-reranking.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/50-build-rag-app/_category_.json create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/60-add-memory/1-add-memory.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/60-add-memory/_category_.json create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/intro.mdx create mode 100644 i18n/pt-BR/docusaurus-plugin-content-docs/current/summary.mdx create mode 100644 i18n/pt-BR/docusaurus-theme-classic/footer.json create mode 100644 i18n/pt-BR/docusaurus-theme-classic/navbar.json diff --git a/docusaurus.config.js b/docusaurus.config.js index 0d1061a0..5b041089 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -118,6 +118,18 @@ const config = { ], ], plugins: [require.resolve("docusaurus-lunr-search")], + i18n: { + defaultLocale: 'en', + locales: ['en', 'pt-BR'], + localeConfigs: { + en: { + label: 'English', + }, + 'pt-BR': { + label: 'Português (Brasil)', + }, + }, + }, themeConfig: /** @type {import('@docusaurus/preset-classic').ThemeConfig} */ ({ @@ -138,7 +150,10 @@ const config = { height: "100%", }, items: [ - + { + "type": "localeDropdown", + "position": "right", + } ], }, footer: { diff --git a/i18n/en/code.json b/i18n/en/code.json new file mode 100644 index 00000000..9afabef1 --- /dev/null +++ b/i18n/en/code.json @@ -0,0 +1,309 @@ +{ + "theme.ErrorPageContent.title": { + "message": "This page crashed.", + "description": "The title of the fallback page when the page crashed" + }, + "theme.BackToTopButton.buttonAriaLabel": { + "message": "Scroll back to top", + "description": "The ARIA label for the back to top button" + }, + "theme.blog.archive.title": { + "message": "Archive", + "description": "The page & hero title of the blog archive page" + }, + "theme.blog.archive.description": { + "message": "Archive", + "description": "The page & hero description of the blog archive page" + }, + "theme.blog.paginator.navAriaLabel": { + "message": "Blog list page navigation", + "description": "The ARIA label for the blog pagination" + }, + "theme.blog.paginator.newerEntries": { + "message": "Newer entries", + "description": "The label used to navigate to the newer blog posts page (previous page)" + }, + "theme.blog.paginator.olderEntries": { + "message": "Older entries", + "description": "The label used to navigate to the older blog posts page (next page)" + }, + "theme.tags.tagsPageLink": { + "message": "View all tags", + "description": "The label of the link targeting the tag list page" + }, + "theme.docs.breadcrumbs.navAriaLabel": { + "message": "Breadcrumbs", + "description": "The ARIA label for the breadcrumbs" + }, + "theme.colorToggle.ariaLabel": { + "message": "Switch between dark and light mode (currently {mode})", + "description": "The ARIA label for the navbar color mode toggle" + }, + "theme.colorToggle.ariaLabel.mode.dark": { + "message": "dark mode", + "description": "The name for the dark color mode" + }, + "theme.colorToggle.ariaLabel.mode.light": { + "message": "light mode", + "description": "The name for the light color mode" + }, + "theme.blog.post.paginator.navAriaLabel": { + "message": "Blog post page navigation", + "description": "The ARIA label for the blog posts pagination" + }, + "theme.blog.post.paginator.newerPost": { + "message": "Newer post", + "description": "The blog post button label to navigate to the newer/previous post" + }, + "theme.blog.post.paginator.olderPost": { + "message": "Older post", + "description": "The blog post button label to navigate to the older/next post" + }, + "theme.docs.DocCard.categoryDescription.plurals": { + "message": "1 item|{count} items", + "description": "The default description for a category card in the generated index about how many items this category includes" + }, + "theme.docs.paginator.navAriaLabel": { + "message": "Docs pages", + "description": "The ARIA label for the docs pagination" + }, + "theme.docs.paginator.previous": { + "message": "Previous", + "description": "The label used to navigate to the previous doc" + }, + "theme.docs.paginator.next": { + "message": "Next", + "description": "The label used to navigate to the next doc" + }, + "theme.docs.tagDocListPageTitle.nDocsTagged": { + "message": "One doc tagged|{count} docs tagged", + "description": "Pluralized label for \"{count} docs tagged\". Use as much plural forms (separated by \"|\") as your language support (see https://www.unicode.org/cldr/cldr-aux/charts/34/supplemental/language_plural_rules.html)" + }, + "theme.docs.tagDocListPageTitle": { + "message": "{nDocsTagged} with \"{tagName}\"", + "description": "The title of the page for a docs tag" + }, + "theme.docs.versions.unreleasedVersionLabel": { + "message": "This is unreleased documentation for {siteTitle} {versionLabel} version.", + "description": "The label used to tell the user that he's browsing an unreleased doc version" + }, + "theme.docs.versions.unmaintainedVersionLabel": { + "message": "This is documentation for {siteTitle} {versionLabel}, which is no longer actively maintained.", + "description": "The label used to tell the user that he's browsing an unmaintained doc version" + }, + "theme.docs.versions.latestVersionSuggestionLabel": { + "message": "For up-to-date documentation, see the {latestVersionLink} ({versionLabel}).", + "description": "The label used to tell the user to check the latest version" + }, + "theme.docs.versions.latestVersionLinkLabel": { + "message": "latest version", + "description": "The label used for the latest version suggestion link label" + }, + "theme.docs.versionBadge.label": { + "message": "Version: {versionLabel}" + }, + "theme.common.editThisPage": { + "message": "Edit this page", + "description": "The link label to edit the current page" + }, + "theme.lastUpdated.atDate": { + "message": " on {date}", + "description": "The words used to describe on which date a page has been last updated" + }, + "theme.lastUpdated.byUser": { + "message": " by {user}", + "description": "The words used to describe by who the page has been last updated" + }, + "theme.lastUpdated.lastUpdatedAtBy": { + "message": "Last updated{atDate}{byUser}", + "description": "The sentence used to display when a page has been last updated, and by who" + }, + "theme.common.headingLinkTitle": { + "message": "Direct link to {heading}", + "description": "Title for link to heading" + }, + "theme.NotFound.title": { + "message": "Page Not Found", + "description": "The title of the 404 page" + }, + "theme.navbar.mobileVersionsDropdown.label": { + "message": "Versions", + "description": "The label for the navbar versions dropdown on mobile view" + }, + "theme.tags.tagsListLabel": { + "message": "Tags:", + "description": "The label alongside a tag list" + }, + "theme.AnnouncementBar.closeButtonAriaLabel": { + "message": "Close", + "description": "The ARIA label for close button of announcement bar" + }, + "theme.admonition.caution": { + "message": "caution", + "description": "The default label used for the Caution admonition (:::caution)" + }, + "theme.admonition.danger": { + "message": "danger", + "description": "The default label used for the Danger admonition (:::danger)" + }, + "theme.admonition.info": { + "message": "info", + "description": "The default label used for the Info admonition (:::info)" + }, + "theme.admonition.note": { + "message": "note", + "description": "The default label used for the Note admonition (:::note)" + }, + "theme.admonition.tip": { + "message": "tip", + "description": "The default label used for the Tip admonition (:::tip)" + }, + "theme.admonition.warning": { + "message": "warning", + "description": "The default label used for the Warning admonition (:::warning)" + }, + "theme.blog.sidebar.navAriaLabel": { + "message": "Blog recent posts navigation", + "description": "The ARIA label for recent posts in the blog sidebar" + }, + "theme.CodeBlock.copied": { + "message": "Copied", + "description": "The copied button label on code blocks" + }, + "theme.CodeBlock.copyButtonAriaLabel": { + "message": "Copy code to clipboard", + "description": "The ARIA label for copy code blocks button" + }, + "theme.CodeBlock.copy": { + "message": "Copy", + "description": "The copy button label on code blocks" + }, + "theme.CodeBlock.wordWrapToggle": { + "message": "Toggle word wrap", + "description": "The title attribute for toggle word wrapping button of code block lines" + }, + "theme.DocSidebarItem.expandCategoryAriaLabel": { + "message": "Expand sidebar category '{label}'", + "description": "The ARIA label to expand the sidebar category" + }, + "theme.DocSidebarItem.collapseCategoryAriaLabel": { + "message": "Collapse sidebar category '{label}'", + "description": "The ARIA label to collapse the sidebar category" + }, + "theme.NotFound.p1": { + "message": "We could not find what you were looking for.", + "description": "The first paragraph of the 404 page" + }, + "theme.NotFound.p2": { + "message": "Please contact the owner of the site that linked you to the original URL and let them know their link is broken.", + "description": "The 2nd paragraph of the 404 page" + }, + "theme.NavBar.navAriaLabel": { + "message": "Main", + "description": "The ARIA label for the main navigation" + }, + "theme.blog.post.readMore": { + "message": "Read more", + "description": "The label used in blog post item excerpts to link to full blog posts" + }, + "theme.blog.post.readMoreLabel": { + "message": "Read more about {title}", + "description": "The ARIA label for the link to full blog posts from excerpts" + }, + "theme.navbar.mobileLanguageDropdown.label": { + "message": "Languages", + "description": "The label for the mobile language switcher dropdown" + }, + "theme.TOCCollapsible.toggleButtonLabel": { + "message": "On this page", + "description": "The label used by the button on the collapsible TOC component" + }, + "theme.blog.post.readingTime.plurals": { + "message": "One min read|{readingTime} min read", + "description": "Pluralized label for \"{readingTime} min read\". Use as much plural forms (separated by \"|\") as your language support (see https://www.unicode.org/cldr/cldr-aux/charts/34/supplemental/language_plural_rules.html)" + }, + "theme.docs.breadcrumbs.home": { + "message": "Home page", + "description": "The ARIA label for the home page in the breadcrumbs" + }, + "theme.docs.sidebar.collapseButtonTitle": { + "message": "Collapse sidebar", + "description": "The title attribute for collapse button of doc sidebar" + }, + "theme.docs.sidebar.collapseButtonAriaLabel": { + "message": "Collapse sidebar", + "description": "The title attribute for collapse button of doc sidebar" + }, + "theme.docs.sidebar.navAriaLabel": { + "message": "Docs sidebar", + "description": "The ARIA label for the sidebar navigation" + }, + "theme.docs.sidebar.closeSidebarButtonAriaLabel": { + "message": "Close navigation bar", + "description": "The ARIA label for close button of mobile sidebar" + }, + "theme.docs.sidebar.expandButtonTitle": { + "message": "Expand sidebar", + "description": "The ARIA label and title attribute for expand button of doc sidebar" + }, + "theme.docs.sidebar.expandButtonAriaLabel": { + "message": "Expand sidebar", + "description": "The ARIA label and title attribute for expand button of doc sidebar" + }, + "theme.navbar.mobileSidebarSecondaryMenu.backButtonLabel": { + "message": "← Back to main menu", + "description": "The label of the back button to return to main menu, inside the mobile navbar sidebar secondary menu (notably used to display the docs sidebar)" + }, + "theme.docs.sidebar.toggleSidebarButtonAriaLabel": { + "message": "Toggle navigation bar", + "description": "The ARIA label for hamburger menu button of mobile navigation" + }, + "theme.blog.post.plurals": { + "message": "One post|{count} posts", + "description": "Pluralized label for \"{count} posts\". Use as much plural forms (separated by \"|\") as your language support (see https://www.unicode.org/cldr/cldr-aux/charts/34/supplemental/language_plural_rules.html)" + }, + "theme.blog.tagTitle": { + "message": "{nPosts} tagged with \"{tagName}\"", + "description": "The title of the page for a blog tag" + }, + "theme.blog.author.pageTitle": { + "message": "{authorName} - {nPosts}", + "description": "The title of the page for a blog author" + }, + "theme.blog.authorsList.pageTitle": { + "message": "Authors", + "description": "The title of the authors page" + }, + "theme.blog.authorsList.viewAll": { + "message": "View all authors", + "description": "The label of the link targeting the blog authors page" + }, + "theme.contentVisibility.unlistedBanner.title": { + "message": "Unlisted page", + "description": "The unlisted content banner title" + }, + "theme.contentVisibility.unlistedBanner.message": { + "message": "This page is unlisted. Search engines will not index it, and only users having a direct link can access it.", + "description": "The unlisted content banner message" + }, + "theme.contentVisibility.draftBanner.title": { + "message": "Draft page", + "description": "The draft content banner title" + }, + "theme.contentVisibility.draftBanner.message": { + "message": "This page is a draft. It will only be visible in dev and be excluded from the production build.", + "description": "The draft content banner message" + }, + "theme.ErrorPageContent.tryAgain": { + "message": "Try again", + "description": "The label of the button to try again rendering when the React error boundary captures an error" + }, + "theme.common.skipToMainContent": { + "message": "Skip to main content", + "description": "The skip to content label used for accessibility, allowing to rapidly navigate to main content with keyboard tab/enter navigation" + }, + "theme.tags.tagsPageTitle": { + "message": "Tags", + "description": "The title of the tag list page" + } +} diff --git a/i18n/en/docusaurus-plugin-content-blog/options.json b/i18n/en/docusaurus-plugin-content-blog/options.json new file mode 100644 index 00000000..9239ff70 --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-blog/options.json @@ -0,0 +1,14 @@ +{ + "title": { + "message": "Blog", + "description": "The title for the blog used in SEO" + }, + "description": { + "message": "Blog", + "description": "The description for the blog used in SEO" + }, + "sidebar.title": { + "message": "Recent posts", + "description": "The label for the left sidebar" + } +} diff --git a/i18n/en/docusaurus-plugin-content-docs/current.json b/i18n/en/docusaurus-plugin-content-docs/current.json new file mode 100644 index 00000000..333d7c80 --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current.json @@ -0,0 +1,54 @@ +{ + "version.label": { + "message": "Next", + "description": "The label for version current" + }, + "sidebar.tutorialSidebar.category.Key Concepts": { + "message": "Key Concepts", + "description": "The label for category Key Concepts in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Key Concepts.link.generated-index.description": { + "message": "Learn the basics of Retrieval Augmented Generation a.k.a. RAG", + "description": "The generated-index page description for category Key Concepts in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Dev Environment": { + "message": "Dev Environment", + "description": "The label for category Dev Environment in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Dev Environment.link.generated-index.description": { + "message": "Setup the dev environment and prerequisites", + "description": "The generated-index page description for category Dev Environment in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Prepare the Data": { + "message": "Prepare the Data", + "description": "The label for category Prepare the Data in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Prepare the Data.link.generated-index.description": { + "message": "Download, chunk, embed, and ingest the data", + "description": "The generated-index page description for category Prepare the Data in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Perform Vector Search on Your Data": { + "message": "Perform Vector Search on Your Data", + "description": "The label for category Perform Vector Search on Your Data in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Perform Vector Search on Your Data.link.generated-index.description": { + "message": "Perform vector search queries using MongoDB Atlas Vector Search", + "description": "The generated-index page description for category Perform Vector Search on Your Data in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Build the RAG Application": { + "message": "Build the RAG Application", + "description": "The label for category Build the RAG Application in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Build the RAG Application.link.generated-index.description": { + "message": "Build the RAG application", + "description": "The generated-index page description for category Build the RAG Application in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Add memory to the RAG application": { + "message": "Add memory to the RAG application", + "description": "The label for category Add memory to the RAG application in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Add memory to the RAG application.link.generated-index.description": { + "message": "Incorporate chat history into the RAG application", + "description": "The generated-index page description for category Add memory to the RAG application in sidebar tutorialSidebar" + } +} diff --git a/i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/1-what-is-rag.mdx b/i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/1-what-is-rag.mdx new file mode 100644 index 00000000..2df9a421 --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/1-what-is-rag.mdx @@ -0,0 +1,5 @@ +# 📘 What is RAG? + +![](/img/screenshots/10-key-concepts/rag.png) + +RAG, short for Retrieval Augmented Generation, is a technique to enhance the quality of responses generated by a large language model (LLM), by augmenting its pre-trained knowledge with information retrieved from external sources. This results is more accurate responses from the LLM by grounding them in real, contextually relevant data. \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/2-rag-usecases.mdx b/i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/2-rag-usecases.mdx new file mode 100644 index 00000000..75fecc3a --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/2-rag-usecases.mdx @@ -0,0 +1,28 @@ +# 📘 When to use RAG? + +RAG is best suited for the following: +* Tasks that require very specific information that you don’t think will be present in the LLMs parametric knowledge i.e. information that is not widely available on the internet +* Tasks that require information from multiple different data sources +* Tasks that involve basic question-answering or summarization on a piece of information + +Do not expect success on complex multi-step tasks involving deductive reasoning or long-term planning. These are more suited for agentic workflows. + +Here are some examples of tasks/questions that **DO NOT** require or cannot be achieved with RAG: + +> Who was the first president of the United States? + +The information required to answer this question is very likely present in the parametric knowledge of most LLMs. Hence, this question can be answered using a simple prompt to an LLM. + +> How has the trend in the average daily calorie intake among adults changed over the last decade in the United States, and what impact might this have on obesity rates? Additionally, can you provide a graphical representation of the trend in obesity rates over this period? + +This question involves multiple sub-tasks such as data aggregation, visualization, and reasoning. Hence, this is a good use case for an AI agent rather than RAG. + +Here are some use cases for RAG: + +> What is the travel reimbursement policy for meals for my company? + +The information required to answer this question is most likely not present in the parametric knowledge of available LLMs. However, this question can easily be answered using RAG on a knowledge base consisting of your company's data. + +> Hi, I'm having trouble installing your software on my Windows 10 computer. It keeps giving me an error message saying 'Installation failed: Error code 1234'. How can I resolve this issue? + +Again, this question requires troubleshooting information for a specific software, the documentation for which might not be widely available, but can be solved using RAG. \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/3-components-of-rag.mdx b/i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/3-components-of-rag.mdx new file mode 100644 index 00000000..d7c21246 --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/3-components-of-rag.mdx @@ -0,0 +1,17 @@ +# 📘 Components of a RAG system + +RAG systems have two main components: **Retrieval** and **Generation**. + +## Retrieval + +Retrieval mainly involves processing your data and constructing a knowledge base in a way that you are able to efficiently retrieve relevant information from it. It typically involves three main steps: + +* **Chunking**: Break down large pieces of information into smaller segments or chunks. + +* **Embedding**: Convert a piece of information such as text, images, audio, video, etc. into an array of numbers a.k.a. vectors. + +* **Vector Search**: Retrieve the most relevant documents from the knowledge base based on embedding similarity with the query vector. + +## Generation + +Generation involves crafting a prompt that contains all the instructions and information required by the LLM to generate accurate answers to user queries. \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/_category_.json b/i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/_category_.json new file mode 100644 index 00000000..6cce0127 --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/10-key-concepts/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Key Concepts", + "position": 10, + "link": { + "type": "generated-index", + "description": "Learn the basics of Retrieval Augmented Generation a.k.a. RAG" + } +} \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/20-dev-env/1-dev-env-setup.mdx b/i18n/en/docusaurus-plugin-content-docs/current/20-dev-env/1-dev-env-setup.mdx new file mode 100644 index 00000000..05d6fc98 --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/20-dev-env/1-dev-env-setup.mdx @@ -0,0 +1,196 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import Screenshot from "@site/src/components/Screenshot"; + +# 👐 Setup dev environment + +
+ 🦹 If you are doing this lab as part of a MongoDB GenAI Developer Day, you can continue in the environment you previously created. + + + + + Navigate to the Instruqt lab using [this](http://mdb.link/instruqt-ai) link. Click **Continue** to continue in the sandbox you created previously. + + + + In the Explorer menu, navigate to `genai-devday-notebooks` > `notebooks` > `ai-rag-lab.ipynb` This is the Jupyter Notebook you will be using throughout this lab. + + + + :::tip + Notice that this documentation website is also linked in the `RAG Lab Instructions` tab of your Instruqt sandbox. Feel free to access the documentation from there instead for the rest of the lab. + ::: + + + + + Navigate to [this](https://github.com/codespaces/new/mongodb-developer/genai-devday-notebooks?quickstart=1) link. Click the **Resume this codespace** button to resume the codespace you created previously. + + + + Give the codespace a few seconds to restart. When files appear in the Explorer tab, click on the file named `ai-rag-lab.ipynb` under `notebooks`. This is the Jupyter Notebook you will be using throughout this lab. + + + + + + + Open the browser tab where your Jupyter Server from before is running and open the file named `ai-rag-lab.ipynb`. + + + + + +
+ +:::tip +In this lab, we will be using Jupyter Notebooks, which is an interactive Python environment. If you are new to Jupyter Notebooks, use [this](https://mongodb-developer.github.io/vector-search-lab/docs/dev-env/jupyter-notebooks) guide to familiarize yourself with the environment. +::: + + + + +Instruqt is a lab platform that provides cloud-based sandboxes which come pre-configured with all the tools you need to run this lab. + +**Navigate to the Instruqt lab using [this](http://mdb.link/instruqt-ai) link.** Fill out the form that appears and click **Submit and access**. + + + +Click **Start** to launch the lab environment. + + + +You should see a screen with a purple progress bar indicating that Instruqt is preparing a sandbox with all the required libraries for this lab and a MongoDB cluster. + +Once this is done, you should see a Start button at the bottom right of the screen. Click this to enter the lab. + + + +### Connect to the MongoDB cluster + +Let's first connect to the MongoDB cluster that was created for you. This will allow you to view data we import into the cluster later in the lab, directly from the VSCode IDE. + +To do this, click the leaf icon in the left navigation bar of the IDE. This is MongoDB's VSCode extension. + + + +Under **Connections**, click the _Local MongoDB Atlas_ connection. This should automatically establish a connection to the local MongoDB cluster running on port 27017. + + + +If the connection was successful, you should see a green leaf and a "connected" message appear around the _Local MongoDB Atlas_ connection. + +You will also see the default databases in the cluster appear under **Connections**. Any additional databases we create during the lab will also appear here. + + + +### Jupyter Notebook setup + +You will be filling code in a Jupyter Notebook during this lab, so let's get set up with that next! + +Within the sandbox, click on the files icon in the left navigation bar of the IDE. In the Explorer menu, navigate to `genai-devday-notebooks` > `notebooks` > `ai-rag-lab.ipynb` to open the Jupyter Notebook for this lab. + + + +Next, select the Python interpreter by clicking **Select Kernel** at the top right of the IDE. + + + +In the modal that appears, click **Python environments...** and select the interpreter that is marked as **Recommended** or **Global Env**. + + + + + +That's it! You're ready for the lab! + +:::tip +Notice that this documentation website is also linked in the `RAG Lab Instructions` tab of your Instruqt sandbox. Feel free to access the documentation from there instead for the rest of the lab. +::: + + + + + +A codespace is a cloud-hosted development environment from GitHub, that comes pre-configured with all the tools you need to run this lab. + +**To create a codespace, navigate to [this](https://github.com/codespaces/new/mongodb-developer/genai-devday-notebooks?quickstart=1) link.** You will be prompted to sign into GitHub if you haven't already. Once signed in, click the **Create new codespace** button to create a new codespace. + + + +Let it run for a few seconds as it prepares a Docker container with all the required libraries and a MongoDB cluster. + +### Connect to the MongoDB cluster + +Let's first connect to the MongoDB cluster that was created for you. This will allow you to view data we import into the cluster later in the lab, directly from the VSCode IDE. + +To do this, click the leaf icon in the left navigation bar of the IDE. This is MongoDB's VSCode extension. + + + +Under **Connections**, click the _Local MongoDB Atlas_ connection. This should automatically establish a connection to the local MongoDB cluster running on port 27017. + + + +If the connection was successful, you should see a green leaf and a "connected" message appear around the _Local MongoDB Atlas_ connection. + +You will also see the default databases in the cluster appear under **Connections**. Any additional databases we create during the lab will also appear here. + + + +### Jupyter Notebook setup + +You will be filling code in a Jupyter Notebook during this lab, so let's get set up with that next! + +Within the codespace, click on the files icon in the left navigation bar of the IDE. In the Explorer menu, under `notebooks`, click on the file named `ai-rag-lab.ipynb` to open the Jupyter Notebook for this lab. + + + +That's it! You're ready for the lab! + + + + + +To run the lab locally, follow the steps below: + +* Clone the [GitHub repo](https://github.com/mongodb-developer/genai-devday-notebooks.git) for this lab by executing the following command from the terminal: + +``` +git clone https://github.com/mongodb-developer/genai-devday-notebooks.git +``` + +* `cd` into the `notebooks` directory of the cloned repository: + +``` +cd genai-devday-notebooks/notebooks +``` + +* Create and activate a Python virtual environment: + +``` +python -m venv ai-rag-lab +source ai-rag-lab/bin/activate +``` + +* Install the dependencies for this lab: + +``` +pip install -r requirements.txt +``` + +* Install and launch Jupyter Notebook: + +``` +pip install notebook +jupyter notebook +``` + +* In the browser tab that pops up, open the file named `ai-rag-lab.ipynb`. + + + + + + \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/20-dev-env/2-setup-pre-reqs.mdx b/i18n/en/docusaurus-plugin-content-docs/current/20-dev-env/2-setup-pre-reqs.mdx new file mode 100644 index 00000000..8c31ae29 --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/20-dev-env/2-setup-pre-reqs.mdx @@ -0,0 +1,26 @@ +# 👐 Setup prerequisites + +Run the cells under **Step 1: Setup prerequisites** section in the notebook. + +:::info + +Additional steps **if you are running the lab locally**: + +* Spin up a MongoDB Atlas cluster and obtain its connection string: + + * Register for a [free MongoDB Atlas account](https://www.mongodb.com/cloud/atlas/register) if you don't already have one + * [Create a new database cluster](https://www.mongodb.com/docs/guides/atlas/cluster) + * [Obtain the connection string](https://www.mongodb.com/docs/guides/atlas/connection-string) for your database cluster + +* Set the `MONGODB_URI` variable to the connection string for your cluster as follows: + +```python +MONGODB_URI = "" +``` + +* Manually set the value of the `SERVERLESS_URL` variable as follows: + +```python +SERVERLESS_URL = "https://vtqjvgchmwcjwsrela2oyhlegu0hwqnw.lambda-url.us-west-2.on.aws/" +``` +::: \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/20-dev-env/_category_.json b/i18n/en/docusaurus-plugin-content-docs/current/20-dev-env/_category_.json new file mode 100644 index 00000000..dd523b0e --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/20-dev-env/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Dev Environment", + "position": 20, + "link": { + "type": "generated-index", + "description": "Setup the dev environment and prerequisites" + } +} \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/1-load-data.mdx b/i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/1-load-data.mdx new file mode 100644 index 00000000..bbba89b0 --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/1-load-data.mdx @@ -0,0 +1,5 @@ +# 👐 Load the dataset + +First, let's download the dataset for the lab. We'll use a subset of MongoDB's technical documentation as the source data for the documentation chatbot. + +Run all the cells under the **Step 2: Load the dataset** section in the notebook to load the dataset from a local file. \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/2-chunk-data.mdx b/i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/2-chunk-data.mdx new file mode 100644 index 00000000..55f821c4 --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/2-chunk-data.mdx @@ -0,0 +1,29 @@ +# 👐 Chunk up the data + +Since we are working with large documents, we first need to break them up into smaller chunks before embedding and storing them in MongoDB. + +Fill in any `` placeholders and run the cells under the **Step 3: Chunk up the data** section in the notebook to chunk up the articles we loaded. + +The answers for code blocks in this section are as follows: + +**CODE_BLOCK_1** + +
+Answer +
+```python +text_splitter.split_text(text) +``` +
+
+ +**CODE_BLOCK_2** + +
+Answer +
+```python +get_chunks(doc, "body") +``` +
+
\ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/3-embed-data.mdx b/i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/3-embed-data.mdx new file mode 100644 index 00000000..8b4c2315 --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/3-embed-data.mdx @@ -0,0 +1,33 @@ +# 👐 Generate embeddings + +To perform vector search on the data, we need to embed it (i.e. generate embedding vectors) before ingesting it into MongoDB. + +Fill in any `` placeholders and run the cells under the **Step 4: Generate embeddings** section in the notebook to embed the chunked articles. + +The answers for code blocks in this section are as follows: + +**CODE_BLOCK_3** + +
+Answer +
+```python +embedding_model.encode(text) +``` +
+
+ +**CODE_BLOCK_4** + +
+Answer +
+```python +doc["embedding"] = get_embedding(doc["body"]) +``` +
+
+ +:::caution +If the embedding generation is taking too long (> 5 min), kill/interrupt the cell and move on to the next step with the documents that have been embedded up until that point. +::: \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/4-ingest-data.mdx b/i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/4-ingest-data.mdx new file mode 100644 index 00000000..7437ce3b --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/4-ingest-data.mdx @@ -0,0 +1,34 @@ +import Screenshot from "@site/src/components/Screenshot"; + +# 👐 Ingest data into MongoDB + +The final step to build a MongoDB vector store for the chatbot is to ingest the embedded article chunks into MongoDB. + +Fill in any `` placeholders and run the cells under the **Step 5: Ingest data into MongoDB** section in the notebook to ingest the embedded documents into MongoDB. + +The answers for code blocks in this section are as follows: + +**CODE_BLOCK_5** + +
+Answer +
+```python +collection.insert_many(embedded_docs) +``` +
+
+ +To visually verify that the data has been imported into the MongoDB cluster, click the leaf icon in the left navigation bar of the IDE. + +Ensure that you see a database called _mongodb_genai_devday_rag_, and a collection named _knowledge_base_ under it. + + + +Click the **>** arrow next to _knowledge_base_ and note the number of documents. + + + +To preview a document, click the **>** arrow next to _Documents_ and select a document from the list. + + \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/_category_.json b/i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/_category_.json new file mode 100644 index 00000000..47a3660f --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/30-prepare-the-data/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Prepare the Data", + "position": 30, + "link": { + "type": "generated-index", + "description": "Download, chunk, embed, and ingest the data" + } +} \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/1-lecture-notes.mdx b/i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/1-lecture-notes.mdx new file mode 100644 index 00000000..49cd329a --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/1-lecture-notes.mdx @@ -0,0 +1,5 @@ +# 📘 Lecture notes + +## Vector Search in MongoDB + +You can learn more about vector search in MongoDB [here](https://mongodb-developer.github.io/vector-search-lab/docs/key-concepts/vector-search#vector-search-in-mongodb). \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/2-create-vector-index.mdx b/i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/2-create-vector-index.mdx new file mode 100644 index 00000000..8063e792 --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/2-create-vector-index.mdx @@ -0,0 +1,20 @@ +# 👐 Create a vector search index + +To retrieve documents from MongoDB using vector search, you must configure a vector search index on the collection into which you ingested your data. In this lab, you will programmatically create vector search indexes using MongoDB's Python driver. + +Fill in any `` placeholders and run the cells under the **Step 6: Create a vector search index** section in the notebook to create a vector search index. + +The answers for code blocks in this section are as follows: + +**CODE_BLOCK_6** + +
+Answer +
+ +```python +create_index(collection, ATLAS_VECTOR_SEARCH_INDEX_NAME, model) +``` + +
+
\ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/3-vector-search.mdx b/i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/3-vector-search.mdx new file mode 100644 index 00000000..b2a3a4b4 --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/3-vector-search.mdx @@ -0,0 +1,57 @@ +# 👐 Perform vector search queries + +Now let's run some vector search queries against the data present in MongoDB. + +Fill in any `` placeholders and run the cells under the **Step 7: Perform vector search on your data** section in the notebook to run vector search queries against your data. + +The answers for code blocks in this section are as follows: + +**CODE_BLOCK_7** + +
+Answer +
+```python +get_embedding(user_query) +``` +
+
+ +**CODE_BLOCK_8** + +
+Answer +
+```json +[ + { + "$vectorSearch": { + "index": ATLAS_VECTOR_SEARCH_INDEX_NAME, + "queryVector": query_embedding, + "path": "embedding", + "numCandidates": 150, + "limit": 5 + } + }, + { + "$project": { + "_id": 0, + "body": 1, + "score": {"$meta": "vectorSearchScore"} + } + } +] +``` +
+
+ +**CODE_BLOCK_9** + +
+Answer +
+```python +collection.aggregate(pipeline) +``` +
+
\ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/4-pre-filtering.mdx b/i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/4-pre-filtering.mdx new file mode 100644 index 00000000..396e5e58 --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/4-pre-filtering.mdx @@ -0,0 +1,126 @@ +# 🦹 Combine pre-filtering with vector search + +Pre-filtering is a technique to optimize vector search by only considering documents that match certain criteria during vector search. + +In this section, you will learn how to combine filters with vector search. This mainly involves: +* Updating the vector search index to include the appropriate filter fields +* Updating the `$vectorSearch` stage in the aggregation pipeline definition to include the filters + +Fill in any `` placeholders and run the cells under the **🦹‍♀️ Combine pre-filtering with vector search** section in the notebook to experiment with combining pre-filters with your vector search queries. + +The answers for code blocks in this section are as follows: + +**CODE_BLOCK_10** + +
+Answer +
+```json +{ + "name": ATLAS_VECTOR_SEARCH_INDEX_NAME, + "type": "vectorSearch", + "definition": { + "fields": [ + { + "type": "vector", + "path": "embedding", + "numDimensions": 384, + "similarity": "cosine" + }, + {"type": "filter", "path": "metadata.productName"} + ] + } +} +``` +
+
+ +**CODE_BLOCK_11** + +
+Answer +
+```json +[ + { + "$vectorSearch": { + "index": ATLAS_VECTOR_SEARCH_INDEX_NAME, + "path": "embedding", + "queryVector": query_embedding, + "numCandidates": 150, + "limit": 5, + "filter": {"metadata.productName": "MongoDB Atlas"} + } + }, + { + "$project": { + "_id": 0, + "body": 1, + "score": {"$meta": "vectorSearchScore"} + } + } +] +``` +
+
+ +**CODE_BLOCK_12** + +
+Answer +
+```json +{ + "name": ATLAS_VECTOR_SEARCH_INDEX_NAME, + "type": "vectorSearch", + "definition": { + "fields": [ + { + "type": "vector", + "path": "embedding", + "numDimensions": 384, + "similarity": "cosine" + }, + {"type": "filter", "path": "metadata.contentType"}, + {"type": "filter", "path": "updated"} + ] + } +} +``` +
+
+ +**CODE_BLOCK_13** + +
+Answer +
+```json +[ + { + "$vectorSearch": { + "index": ATLAS_VECTOR_SEARCH_INDEX_NAME, + "path": "embedding", + "queryVector": query_embedding, + "numCandidates": 150, + "limit": 5, + "filter": { + "$and": [ + {"metadata.contentType": "Tutorial"}, + {"updated": {"$gte": "2024-05-19"}} + ] + } + } + }, + { + "$project": { + "_id": 0, + "body": 1, + "updated": 1, + "score": {"$meta": "vectorSearchScore"} + } + } +] +``` +
+
\ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/_category_.json b/i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/_category_.json new file mode 100644 index 00000000..4ce44685 --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/40-perform-vector-search/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Perform Vector Search on Your Data", + "position": 40, + "link": { + "type": "generated-index", + "description": "Perform vector search queries using MongoDB Atlas Vector Search" + } +} \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/50-build-rag-app/1-build-rag-app.mdx b/i18n/en/docusaurus-plugin-content-docs/current/50-build-rag-app/1-build-rag-app.mdx new file mode 100644 index 00000000..d69cfbba --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/50-build-rag-app/1-build-rag-app.mdx @@ -0,0 +1,40 @@ +# 👐 Build the RAG application + +Let's create a simple RAG workflow that takes in a user query, retrieves contextually relevant documents from MongoDB Atlas, and passes the query and retrieved context to an LLM to generate an answer to the user question. + +Fill in any `` placeholders and run the cells under the **Step 8: Build the RAG application** section in the notebook to build the RAG "application". + +The answers for code blocks in this section are as follows: + +**CODE_BLOCK_14** + +
+Answer +
+```python +vector_search(user_query) +``` +
+
+ +**CODE_BLOCK_15** + +
+Answer +
+```python +create_prompt(user_query) +``` +
+
+ +**CODE_BLOCK_16** + +
+Answer +
+```python +[{"role": "user", "content": prompt}] +``` +
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/50-build-rag-app/2-add-reranking.mdx b/i18n/en/docusaurus-plugin-content-docs/current/50-build-rag-app/2-add-reranking.mdx new file mode 100644 index 00000000..4922b10f --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/50-build-rag-app/2-add-reranking.mdx @@ -0,0 +1,20 @@ +# 🦹 Re-rank retrieved results + +Re-rankers are specialized models that are trained to calculate the relevance between query-document pairs. Without re-ranking the order of retrieved results is governed by the embedding model, which isn't optimized for relevance and can lead to poor LLM recall in RAG applications. + +Fill in any `` placeholders and run the cells under the **🦹‍♀️ Re-rank retrieved results** section in the notebook to add a re-ranking stage to the chatbot. + +The answers for code blocks in this section are as follows: + +**CODE_BLOCK_17** + +
+Answer +
+```python +rerank_model.rank( + user_query, documents, return_documents=True, top_k=5 +) +``` +
+
\ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/50-build-rag-app/_category_.json b/i18n/en/docusaurus-plugin-content-docs/current/50-build-rag-app/_category_.json new file mode 100644 index 00000000..caadea1e --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/50-build-rag-app/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Build the RAG Application", + "position": 50, + "link": { + "type": "generated-index", + "description": "Build the RAG application" + } +} \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/60-add-memory/1-add-memory.mdx b/i18n/en/docusaurus-plugin-content-docs/current/60-add-memory/1-add-memory.mdx new file mode 100644 index 00000000..5901342d --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/60-add-memory/1-add-memory.mdx @@ -0,0 +1,74 @@ +# 👐 Add memory to the RAG application + +In many Q&A applications we want to allow the user to have a back-and-forth conversation with the LLM, meaning the application needs some sort of "memory" of past questions and answers, and some logic for incorporating those into its current thinking. In this section, you will retrieve chat message history from MongoDB and incorporate it in your RAG application. + +Fill in any `` placeholders and run the cells under the **Step 9: Add memory to the RAG application** section in the notebook to add memory to the chatbot. + +The answers for code blocks in this section are as follows: + +**CODE_BLOCK_18** + +
+Answer +
+```python +history_collection.create_index("session_id") +``` +
+
+ +**CODE_BLOCK_19** + +
+Answer +
+```python +history_collection.insert_one(message) +``` +
+
+ +**CODE_BLOCK_20** + +
+Answer +
+```python +history_collection.find({"session_id": session_id}).sort("timestamp", 1) +``` +
+
+ +**CODE_BLOCK_21** + +
+Answer +
+```python +retrieve_session_history(session_id) +``` +
+
+ +**CODE_BLOCK_22** + +
+Answer +
+```python +{"role": "user", "content": user_query} +``` +
+
+ +**CODE_BLOCK_23** + +
+Answer +
+```python +store_chat_message(session_id, "user", user_query) +store_chat_message(session_id, "assistant", answer) +``` +
+
\ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/60-add-memory/_category_.json b/i18n/en/docusaurus-plugin-content-docs/current/60-add-memory/_category_.json new file mode 100644 index 00000000..62b680e9 --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/60-add-memory/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Add memory to the RAG application", + "position": 60, + "link": { + "type": "generated-index", + "description": "Incorporate chat history into the RAG application" + } +} \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/intro.mdx b/i18n/en/docusaurus-plugin-content-docs/current/intro.mdx new file mode 100644 index 00000000..006d68af --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/intro.mdx @@ -0,0 +1,26 @@ +--- +sidebar_position: 0 +--- + +# Introduction + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +|Lab goals|Learn how to build a documentation chatbot| +|:-|:-| +|What you'll learn|What is RAG | +||Components of a RAG application| +||Perform vector search queries using Mongo Atlas Vector Search| +||Build a RAG-based documentation chatbot using MongoDB Atlas| +||Add memory to your chatbot| +|Time to complete|90 mins| + +In the navigation bar and in some pages, you will notice some icons. Here is their meaning: + +|Icon|Meaning| Description +|-|-|-| +|📘|Lecture material|If you are following along in an instructor-led session, they probably have covered this already.| +|👐|Hands-on content|Get ready to do some hands-on work. You should follow these steps.| +|📚|Documentation| Reference documentation for hands-on portions of the lab.| +|🦹|Advanced content|This content isn't covered during the lab, but if you're interested in learning more, you can check it out.| \ No newline at end of file diff --git a/i18n/en/docusaurus-plugin-content-docs/current/summary.mdx b/i18n/en/docusaurus-plugin-content-docs/current/summary.mdx new file mode 100644 index 00000000..c672869b --- /dev/null +++ b/i18n/en/docusaurus-plugin-content-docs/current/summary.mdx @@ -0,0 +1,17 @@ +--- +sidebar_position: 100 +--- + +# 🎯 Summary + +Congratulations! Following this lab, you have successfully: +* learned what is Retrieval Augmented Generation a.k.a. RAG +* learned when to use RAG +* learned how to perform vector search against data in MongoDB +* built a RAG application +* added memory to your RAG application + +Here are some resources that you might find helpful: +* [AI Learning Hub](https://www.mongodb.com/resources/use-cases/artificial-intelligence?utm_campaign=devrel&utm_source=devday&utm_medium=cta&utm_content=ai_rag_workshop&utm_term=apoorva_joshi) +* [GenAI Code Examples Repository](https://github.com/mongodb-developer/GenAI-Showcase) +* [GenAI Community Forums](https://www.mongodb.com/community/forums/c/generative-ai/162/?utm_campaign=devrel&utm_source=devday&utm_medium=cta&utm_content=ai_rag_workshop&utm_term=apoorva_joshi) \ No newline at end of file diff --git a/i18n/en/docusaurus-theme-classic/footer.json b/i18n/en/docusaurus-theme-classic/footer.json new file mode 100644 index 00000000..26d06e23 --- /dev/null +++ b/i18n/en/docusaurus-theme-classic/footer.json @@ -0,0 +1,26 @@ +{ + "link.item.label.Try MongoDB Atlas": { + "message": "Try MongoDB Atlas", + "description": "The label of footer link with label=Try MongoDB Atlas linking to https://www.mongodb.com/try?utm_campaign=devrel&utm_source=workshop&utm_medium=cta&utm_content=ai_rag_lab&utm_term=apoorva.joshi" + }, + "link.item.label.Developer Center": { + "message": "Developer Center", + "description": "The label of footer link with label=Developer Center linking to https://www.mongodb.com/developer?utm_campaign=devrel&utm_source=workshop&utm_medium=cta&utm_content=ai_rag_lab&utm_term=apoorva.joshi" + }, + "link.item.label.GenAI Code Examples": { + "message": "GenAI Code Examples", + "description": "The label of footer link with label=GenAI Code Examples linking to https://github.com/mongodb-developer/GenAI-Showcase" + }, + "link.item.label.GenAI Forums": { + "message": "GenAI Forums", + "description": "The label of footer link with label=GenAI Forums linking to https://www.mongodb.com/community/forums/c/generative-ai/162?utm_campaign=devrel&utm_source=workshop&utm_medium=cta&utm_content=ai_rag_lab&utm_term=apoorva.joshi" + }, + "link.item.label.This lab in GitHub": { + "message": "This lab in GitHub", + "description": "The label of footer link with label=This lab in GitHub linking to https://github.com/mongodb-developer/ai-rag-lab" + }, + "copyright": { + "message": "© 2025 MongoDB, Inc.", + "description": "The footer copyright" + } +} diff --git a/i18n/en/docusaurus-theme-classic/navbar.json b/i18n/en/docusaurus-theme-classic/navbar.json new file mode 100644 index 00000000..c7160431 --- /dev/null +++ b/i18n/en/docusaurus-theme-classic/navbar.json @@ -0,0 +1,10 @@ +{ + "title": { + "message": "Build RAG Applications using MongoDB", + "description": "The title in the navbar" + }, + "logo.alt": { + "message": "MongoDB Logo", + "description": "The alt text of navbar logo" + } +} diff --git a/i18n/pt-BR/code.json b/i18n/pt-BR/code.json new file mode 100644 index 00000000..d39c424c --- /dev/null +++ b/i18n/pt-BR/code.json @@ -0,0 +1,345 @@ +{ + "theme.ErrorPageContent.title": { + "message": "Esta página travou.", + "description": "The title of the fallback page when the page crashed" + }, + "theme.BackToTopButton.buttonAriaLabel": { + "message": "Rolar de volta ao topo", + "description": "The ARIA label for the back to top button" + }, + "theme.blog.archive.title": { + "message": "Arquivo", + "description": "The page & hero title of the blog archive page" + }, + "theme.blog.archive.description": { + "message": "Arquivo", + "description": "The page & hero description of the blog archive page" + }, + "theme.blog.paginator.navAriaLabel": { + "message": "Navegação da lista de blog", + "description": "The ARIA label for the blog pagination" + }, + "theme.blog.paginator.newerEntries": { + "message": "Entradas mais recentes", + "description": "The label used to navigate to the newer blog posts page (previous page)" + }, + "theme.blog.paginator.olderEntries": { + "message": "Entradas mais antigas", + "description": "The label used to navigate to the older blog posts page (next page)" + }, + "theme.tags.tagsPageLink": { + "message": "Ver todas as tags", + "description": "The label of the link targeting the tag list page" + }, + "theme.docs.breadcrumbs.navAriaLabel": { + "message": "Navegação", + "description": "The ARIA label for the breadcrumbs" + }, + "theme.colorToggle.ariaLabel": { + "message": "Alternar entre modo escuro e claro (atualmente {mode})", + "description": "The ARIA label for the navbar color mode toggle" + }, + "theme.colorToggle.ariaLabel.mode.dark": { + "message": "modo escuro", + "description": "The name for the dark color mode" + }, + "theme.colorToggle.ariaLabel.mode.light": { + "message": "modo claro", + "description": "The name for the light color mode" + }, + "theme.blog.post.paginator.navAriaLabel": { + "message": "Navegação da página do post do blog", + "description": "The ARIA label for the blog posts pagination" + }, + "theme.blog.post.paginator.newerPost": { + "message": "Post mais recente", + "description": "The blog post button label to navigate to the newer/previous post" + }, + "theme.blog.post.paginator.olderPost": { + "message": "Post mais antigo", + "description": "The blog post button label to navigate to the older/next post" + }, + "theme.docs.DocCard.categoryDescription.plurals": { + "message": "1 item|{count} itens", + "description": "The default description for a category card in the generated index about how many items this category includes" + }, + "theme.docs.paginator.navAriaLabel": { + "message": "Páginas de documentação", + "description": "The ARIA label for the docs pagination" + }, + "theme.docs.paginator.previous": { + "message": "Anterior", + "description": "The label used to navigate to the previous doc" + }, + "theme.docs.paginator.next": { + "message": "Próximo", + "description": "The label used to navigate to the next doc" + }, + "theme.docs.tagDocListPageTitle.nDocsTagged": { + "message": "Um documento marcado|{count} documentos marcados", + "description": "Pluralized label for \"{count} docs tagged\". Use as much plural forms (separated by \"|\") as your language support (see https://www.unicode.org/cldr/cldr-aux/charts/34/supplemental/language_plural_rules.html)" + }, + "theme.docs.tagDocListPageTitle": { + "message": "{nDocsTagged} com \"{tagName}\"", + "description": "The title of the page for a docs tag" + }, + "theme.docs.versions.unreleasedVersionLabel": { + "message": "Esta é documentação não lançada para {siteTitle} versão {versionLabel}.", + "description": "The label used to tell the user that he's browsing an unreleased doc version" + }, + "theme.docs.versions.unmaintainedVersionLabel": { + "message": "Esta é documentação para {siteTitle} {versionLabel}, que não é mais mantida ativamente.", + "description": "The label used to tell the user that he's browsing an unmaintained doc version" + }, + "theme.docs.versions.latestVersionSuggestionLabel": { + "message": "Para documentação atualizada, veja a {latestVersionLink} ({versionLabel}).", + "description": "The label used to tell the user to check the latest version" + }, + "theme.docs.versions.latestVersionLinkLabel": { + "message": "versão mais recente", + "description": "The label used for the latest version suggestion link label" + }, + "theme.docs.versionBadge.label": { + "message": "Versão: {versionLabel}" + }, + "theme.common.editThisPage": { + "message": "Editar esta página", + "description": "The link label to edit the current page" + }, + "theme.lastUpdated.atDate": { + "message": " em {date}", + "description": "The words used to describe on which date a page has been last updated" + }, + "theme.lastUpdated.byUser": { + "message": " por {user}", + "description": "The words used to describe by who the page has been last updated" + }, + "theme.lastUpdated.lastUpdatedAtBy": { + "message": "Última atualização{atDate}{byUser}", + "description": "The sentence used to display when a page has been last updated, and by who" + }, + "theme.common.headingLinkTitle": { + "message": "Link direto para {heading}", + "description": "Title for link to heading" + }, + "theme.NotFound.title": { + "message": "Página Não Encontrada", + "description": "The title of the 404 page" + }, + "theme.navbar.mobileVersionsDropdown.label": { + "message": "Versões", + "description": "The label for the navbar versions dropdown on mobile view" + }, + "theme.AnnouncementBar.closeButtonAriaLabel": { + "message": "Fechar", + "description": "The ARIA label for close button of announcement bar" + }, + "theme.admonition.caution": { + "message": "cuidado", + "description": "The default label used for the Caution admonition (:::caution)" + }, + "theme.admonition.danger": { + "message": "perigo", + "description": "The default label used for the Danger admonition (:::danger)" + }, + "theme.admonition.info": { + "message": "informação", + "description": "The default label used for the Info admonition (:::info)" + }, + "theme.admonition.note": { + "message": "nota", + "description": "The default label used for the Note admonition (:::note)" + }, + "theme.admonition.tip": { + "message": "dica", + "description": "The default label used for the Tip admonition (:::tip)" + }, + "theme.admonition.warning": { + "message": "aviso", + "description": "The default label used for the Warning admonition (:::warning)" + }, + "theme.blog.sidebar.navAriaLabel": { + "message": "Navegação de posts recentes do blog", + "description": "The ARIA label for recent posts in the blog sidebar" + }, + "theme.CodeBlock.copied": { + "message": "Copiado", + "description": "The copied button label on code blocks" + }, + "theme.CodeBlock.copyButtonAriaLabel": { + "message": "Copiar código para a área de transferência", + "description": "The ARIA label for copy code blocks button" + }, + "theme.CodeBlock.copy": { + "message": "Copiar", + "description": "The copy button label on code blocks" + }, + "theme.CodeBlock.wordWrapToggle": { + "message": "Alternar quebra de linha", + "description": "The title attribute for toggle word wrapping button of code block lines" + }, + "theme.DocSidebarItem.expandCategoryAriaLabel": { + "message": "Expandir categoria da barra lateral '{label}'", + "description": "The ARIA label to expand the sidebar category" + }, + "theme.DocSidebarItem.collapseCategoryAriaLabel": { + "message": "Recolher categoria da barra lateral '{label}'", + "description": "The ARIA label to collapse the sidebar category" + }, + "theme.NotFound.p1": { + "message": "Não conseguimos encontrar o que você estava procurando.", + "description": "The first paragraph of the 404 page" + }, + "theme.NotFound.p2": { + "message": "Entre em contato com o proprietário do site que o linkou para a URL original e informe que o link está quebrado.", + "description": "The 2nd paragraph of the 404 page" + }, + "theme.NavBar.navAriaLabel": { + "message": "Principal", + "description": "The ARIA label for the main navigation" + }, + "theme.blog.post.readMore": { + "message": "Ler mais", + "description": "The label used in blog post item excerpts to link to full blog posts" + }, + "theme.blog.post.readMoreLabel": { + "message": "Ler mais sobre {title}", + "description": "The ARIA label for the link to full blog posts from excerpts" + }, + "theme.navbar.mobileLanguageDropdown.label": { + "message": "Idiomas", + "description": "The label for the mobile language switcher dropdown" + }, + "theme.TOCCollapsible.toggleButtonLabel": { + "message": "Nesta página", + "description": "The label used by the button on the collapsible TOC component" + }, + "theme.blog.post.readingTime.plurals": { + "message": "Um min de leitura|{readingTime} min de leitura", + "description": "Pluralized label for \"{readingTime} min read\". Use as much plural forms (separated by \"|\") as your language support (see https://www.unicode.org/cldr/cldr-aux/charts/34/supplemental/language_plural_rules.html)" + }, + "theme.docs.breadcrumbs.home": { + "message": "Página inicial", + "description": "The ARIA label for the home page in the breadcrumbs" + }, + "theme.docs.sidebar.collapseButtonTitle": { + "message": "Recolher barra lateral", + "description": "The title attribute for collapse button of doc sidebar" + }, + "theme.docs.sidebar.collapseButtonAriaLabel": { + "message": "Recolher barra lateral", + "description": "The title attribute for collapse button of doc sidebar" + }, + "theme.docs.sidebar.navAriaLabel": { + "message": "Barra lateral da documentação", + "description": "The ARIA label for the sidebar navigation" + }, + "theme.docs.sidebar.closeSidebarButtonAriaLabel": { + "message": "Fechar barra de navegação", + "description": "The ARIA label for close button of mobile sidebar" + }, + "theme.docs.sidebar.expandButtonTitle": { + "message": "Expandir barra lateral", + "description": "The ARIA label and title attribute for expand button of doc sidebar" + }, + "theme.docs.sidebar.expandButtonAriaLabel": { + "message": "Expandir barra lateral", + "description": "The ARIA label and title attribute for expand button of doc sidebar" + }, + "theme.navbar.mobileSidebarSecondaryMenu.backButtonLabel": { + "message": "← Voltar ao menu principal", + "description": "The label of the back button to return to main menu, inside the mobile navbar sidebar secondary menu (notably used to display the docs sidebar)" + }, + "theme.docs.sidebar.toggleSidebarButtonAriaLabel": { + "message": "Alternar barra de navegação", + "description": "The ARIA label for hamburger menu button of mobile navigation" + }, + "theme.blog.post.plurals": { + "message": "Um post|{count} posts", + "description": "Pluralized label for \"{count} posts\". Use as much plural forms (separated by \"|\") as your language support (see https://www.unicode.org/cldr/cldr-aux/charts/34/supplemental/language_plural_rules.html)" + }, + "theme.blog.tagTitle": { + "message": "{nPosts} marcados com \"{tagName}\"", + "description": "The title of the page for a blog tag" + }, + "theme.blog.author.pageTitle": { + "message": "{authorName} - {nPosts}", + "description": "The title of the page for a blog author" + }, + "theme.blog.authorsList.pageTitle": { + "message": "Autores", + "description": "The title of the authors page" + }, + "theme.blog.authorsList.viewAll": { + "message": "Ver todos os autores", + "description": "The label of the link targeting the blog authors page" + }, + "theme.contentVisibility.unlistedBanner.title": { + "message": "Página não listada", + "description": "The unlisted content banner title" + }, + "theme.contentVisibility.unlistedBanner.message": { + "message": "Esta página não está listada. Os motores de busca não a indexarão, e apenas usuários com um link direto podem acessá-la.", + "description": "The unlisted content banner message" + }, + "theme.contentVisibility.draftBanner.title": { + "message": "Página rascunho", + "description": "The draft content banner title" + }, + "theme.contentVisibility.draftBanner.message": { + "message": "Esta página é um rascunho. Ela só será visível em desenvolvimento e será excluída da build de produção.", + "description": "The draft content banner message" + }, + "theme.ErrorPageContent.tryAgain": { + "message": "Tentar novamente", + "description": "The label of the button to try again rendering when the React error boundary captures an error" + }, + "theme.common.skipToMainContent": { + "message": "Pular para o conteúdo principal", + "description": "The skip to content label used for accessibility, allowing to rapidly navigate to main content with keyboard tab/enter navigation" + }, + "theme.tags.tagsPageTitle": { + "message": "Tags", + "description": "The title of the tag list page" + }, + "homepage.title": { + "message": "Construa Aplicações RAG usando MongoDB", + "description": "The title of the homepage" + }, + "homepage.startButtonTitle": { + "message": "Iniciar Laboratório", + "description": "The title of the start button on the homepage" + }, + "homepage.feature.handsOn.title": { + "message": "Experiências Práticas", + "description": "The title of the hands-on experiences feature" + }, + "homepage.feature.handsOn.description": { + "message": "Aprenda fazendo, não apenas lendo.", + "description": "The description of the hands-on experiences feature" + }, + "homepage.feature.instructors.title": { + "message": "Instrutores Incríveis", + "description": "The title of the amazing instructors feature" + }, + "homepage.feature.instructors.description": { + "message": "Construa com a ajuda de nossos instrutores incríveis, ou faça por conta própria.", + "description": "The description of the amazing instructors feature" + }, + "homepage.feature.material.title": { + "message": "Material para Levar", + "description": "The title of the take-home material feature" + }, + "homepage.feature.material.description": { + "message": "Leve o material para casa e continue aprendendo.", + "description": "The description of the take-home material feature" + }, + "theme.docs.paginator.nextPage": { + "message": "Conceitos Fundamentais", + "description": "The label for the next page in pagination" + }, + "theme.docs.paginator.previousPage": { + "message": "Página Anterior", + "description": "The label for the previous page in pagination" + } +} diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current.json b/i18n/pt-BR/docusaurus-plugin-content-docs/current.json new file mode 100644 index 00000000..0f6dfa6c --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current.json @@ -0,0 +1,54 @@ +{ + "version.label": { + "message": "Próximo", + "description": "The label for version current" + }, + "sidebar.tutorialSidebar.category.Key Concepts": { + "message": "Conceitos Fundamentais", + "description": "The label for category Key Concepts in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Key Concepts.link.generated-index.description": { + "message": "Aprenda os fundamentos da Geração Aumentada por Recuperação, também conhecida como RAG", + "description": "The generated-index page description for category Key Concepts in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Dev Environment": { + "message": "Ambiente de Desenvolvimento", + "description": "The label for category Dev Environment in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Dev Environment.link.generated-index.description": { + "message": "Configure o ambiente de desenvolvimento e pré-requisitos", + "description": "The generated-index page description for category Dev Environment in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Prepare the Data": { + "message": "Preparar os Dados", + "description": "The label for category Prepare the Data in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Prepare the Data.link.generated-index.description": { + "message": "Baixe, fragmente, embute e ingira os dados", + "description": "The generated-index page description for category Prepare the Data in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Perform Vector Search on Your Data": { + "message": "Realizar Busca Vetorial nos Seus Dados", + "description": "The label for category Perform Vector Search on Your Data in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Perform Vector Search on Your Data.link.generated-index.description": { + "message": "Execute consultas de busca vetorial usando MongoDB Atlas Vector Search", + "description": "The generated-index page description for category Perform Vector Search on Your Data in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Build the RAG Application": { + "message": "Construir a Aplicação RAG", + "description": "The label for category Build the RAG Application in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Build the RAG Application.link.generated-index.description": { + "message": "Construa a aplicação RAG", + "description": "The generated-index page description for category Build the RAG Application in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Add memory to the RAG application": { + "message": "Adicionar memória à aplicação RAG", + "description": "The label for category Add memory to the RAG application in sidebar tutorialSidebar" + }, + "sidebar.tutorialSidebar.category.Add memory to the RAG application.link.generated-index.description": { + "message": "Incorpore o histórico de conversas na aplicação RAG", + "description": "The generated-index page description for category Add memory to the RAG application in sidebar tutorialSidebar" + } +} diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/1-what-is-rag.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/1-what-is-rag.mdx new file mode 100644 index 00000000..db435542 --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/1-what-is-rag.mdx @@ -0,0 +1,5 @@ +# 📘 O que é RAG? + +![](/img/screenshots/10-key-concepts/rag.png) + +RAG, abreviação de Retrieval Augmented Generation (Geração Aumentada por Recuperação), é uma técnica para melhorar a qualidade das respostas geradas por um modelo de linguagem grande (LLM), aumentando seu conhecimento pré-treinado com informações recuperadas de fontes externas. Isso resulta em respostas mais precisas do LLM, fundamentando-as em dados reais e contextualmente relevantes. \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/2-rag-usecases.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/2-rag-usecases.mdx new file mode 100644 index 00000000..59c043f7 --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/2-rag-usecases.mdx @@ -0,0 +1,28 @@ +# 📘 Quando usar RAG? + +RAG é mais adequado para o seguinte: +* Tarefas que requerem informações muito específicas que você não acha que estarão presentes no conhecimento paramétrico dos LLMs, ou seja, informações que não estão amplamente disponíveis na internet +* Tarefas que requerem informações de múltiplas fontes de dados diferentes +* Tarefas que envolvem questionamento básico ou resumo de uma informação + +Não espere sucesso em tarefas complexas de múltiplas etapas envolvendo raciocínio dedutivo ou planejamento de longo prazo. Essas são mais adequadas para fluxos de trabalho baseados em agentes. + +Aqui estão alguns exemplos de tarefas/perguntas que **NÃO** requerem ou não podem ser alcançadas com RAG: + +> Quem foi o primeiro presidente dos Estados Unidos? + +A informação necessária para responder a esta pergunta muito provavelmente está presente no conhecimento paramétrico da maioria dos LLMs. Portanto, esta pergunta pode ser respondida usando um prompt simples para um LLM. + +> Como a tendência na ingestão média diária de calorias entre adultos mudou na última década nos Estados Unidos, e qual impacto isso pode ter nas taxas de obesidade? Além disso, você pode fornecer uma representação gráfica da tendência nas taxas de obesidade durante este período? + +Esta pergunta envolve múltiplas sub-tarefas como agregação de dados, visualização e raciocínio. Portanto, este é um bom caso de uso para um agente de IA em vez de RAG. + +Aqui estão alguns casos de uso para RAG: + +> Qual é a política de reembolso de viagem para refeições da minha empresa? + +A informação necessária para responder a esta pergunta muito provavelmente não está presente no conhecimento paramétrico dos LLMs disponíveis. No entanto, esta pergunta pode ser facilmente respondida usando RAG em uma base de conhecimento consistindo dos dados da sua empresa. + +> Olá, estou tendo problemas para instalar seu software no meu computador Windows 10. Ele continua me dando uma mensagem de erro dizendo 'Instalação falhou: Código de erro 1234'. Como posso resolver este problema? + +Novamente, esta pergunta requer informações de solução de problemas para um software específico, cuja documentação pode não estar amplamente disponível, mas pode ser resolvida usando RAG. \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/3-components-of-rag.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/3-components-of-rag.mdx new file mode 100644 index 00000000..2a10ee1b --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/3-components-of-rag.mdx @@ -0,0 +1,17 @@ +# 📘 Componentes de um sistema RAG + +Sistemas RAG têm dois componentes principais: **Recuperação** e **Geração**. + +## Recuperação + +A recuperação envolve principalmente processar seus dados e construir uma base de conhecimento de forma que você seja capaz de recuperar eficientemente informações relevantes dela. Tipicamente envolve três passos principais: + +* **Chunking**: Dividir grandes pedaços de informação em segmentos ou chunks menores. + +* **Embedding**: Converter uma informação como texto, imagens, áudio, vídeo, etc. em um array de números, também conhecido como vetores. + +* **Busca Vetorial**: Recuperar os documentos mais relevantes da base de conhecimento baseado na similaridade de embeddings com o vetor da consulta. + +## Geração + +A geração envolve criar um prompt que contém todas as instruções e informações necessárias pelo LLM para gerar respostas precisas às consultas do usuário. \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/_category_.json b/i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/_category_.json new file mode 100644 index 00000000..66ce418b --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/10-key-concepts/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Conceitos Fundamentais", + "position": 10, + "link": { + "type": "generated-index", + "description": "Aprenda os fundamentos da Geração Aumentada por Recuperação, também conhecida como RAG" + } +} \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/20-dev-env/1-dev-env-setup.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/20-dev-env/1-dev-env-setup.mdx new file mode 100644 index 00000000..597282c8 --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/20-dev-env/1-dev-env-setup.mdx @@ -0,0 +1,196 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import Screenshot from "@site/src/components/Screenshot"; + +# 👐 Configurar ambiente de desenvolvimento + +
+ 🦹 Se você está fazendo este laboratório como parte de um MongoDB GenAI Developer Day, você pode continuar no ambiente que criou anteriormente. + + + + + Navegue até o laboratório Instruqt usando [este](http://mdb.link/instruqt-ai) link. Clique em **Continue** para continuar no sandbox que você criou anteriormente. + + + + No menu Explorer, navegue até `genai-devday-notebooks` > `notebooks` > `ai-rag-lab.ipynb` Este é o Jupyter Notebook que você usará durante este laboratório. + + + + :::tip + Observe que este site de documentação também está linkado na aba `RAG Lab Instructions` do seu sandbox Instruqt. Sinta-se à vontade para acessar a documentação de lá para o resto do laboratório. + ::: + + + + + Navegue até [este](https://github.com/codespaces/new/mongodb-developer/genai-devday-notebooks?quickstart=1) link. Clique no botão **Resume this codespace** para retomar o codespace que você criou anteriormente. + + + + Dê ao codespace alguns segundos para reiniciar. Quando os arquivos aparecerem na aba Explorer, clique no arquivo chamado `ai-rag-lab.ipynb` sob `notebooks`. Este é o Jupyter Notebook que você usará durante este laboratório. + + + + + + + Abra a aba do navegador onde seu Jupyter Server de antes está rodando e abra o arquivo chamado `ai-rag-lab.ipynb`. + + + + + +
+ +:::tip +Neste laboratório, usaremos Jupyter Notebooks, que é um ambiente Python interativo. Se você é novo em Jupyter Notebooks, use [este](https://mongodb-developer.github.io/vector-search-lab/docs/dev-env/jupyter-notebooks) guia para se familiarizar com o ambiente. +::: + + + + +Instruqt é uma plataforma de laboratório que fornece sandboxes baseados em nuvem que vêm pré-configurados com todas as ferramentas que você precisa para executar este laboratório. + +**Navegue até o laboratório Instruqt usando [este](http://mdb.link/instruqt-ai) link.** Preencha o formulário que aparece e clique em **Submit and access**. + + + +Clique em **Start** para iniciar o ambiente do laboratório. + + + +Você deve ver uma tela com uma barra de progresso roxa indicando que o Instruqt está preparando um sandbox com todas as bibliotecas necessárias para este laboratório e um cluster MongoDB. + +Quando isso estiver feito, você deve ver um botão Start no canto inferior direito da tela. Clique nele para entrar no laboratório. + + + +### Conectar ao cluster MongoDB + +Vamos primeiro conectar ao cluster MongoDB que foi criado para você. Isso permitirá que você visualize dados que importamos para o cluster mais tarde no laboratório, diretamente do IDE VSCode. + +Para fazer isso, clique no ícone de folha na barra de navegação esquerda do IDE. Esta é a extensão VSCode do MongoDB. + + + +Sob **Connections**, clique na conexão _Local MongoDB Atlas_. Isso deve automaticamente estabelecer uma conexão com o cluster MongoDB local rodando na porta 27017. + + + +Se a conexão foi bem-sucedida, você deve ver uma folha verde e uma mensagem "connected" aparecer ao redor da conexão _Local MongoDB Atlas_. + +Você também verá os bancos de dados padrão no cluster aparecer sob **Connections**. Quaisquer bancos de dados adicionais que criarmos durante o laboratório também aparecerão aqui. + + + +### Configuração do Jupyter Notebook + +Você estará preenchendo código em um Jupyter Notebook durante este laboratório, então vamos configurar isso a seguir! + +Dentro do sandbox, clique no ícone de arquivos na barra de navegação esquerda do IDE. No menu Explorer, navegue até `genai-devday-notebooks` > `notebooks` > `ai-rag-lab.ipynb` para abrir o Jupyter Notebook para este laboratório. + + + +Em seguida, selecione o interpretador Python clicando em **Select Kernel** no canto superior direito do IDE. + + + +No modal que aparece, clique em **Python environments...** e selecione o interpretador que está marcado como **Recommended** ou **Global Env**. + + + + + +É isso! Você está pronto para o laboratório! + +:::tip +Observe que este site de documentação também está linkado na aba `RAG Lab Instructions` do seu sandbox Instruqt. Sinta-se à vontade para acessar a documentação de lá para o resto do laboratório. +::: + + + + + +Um codespace é um ambiente de desenvolvimento hospedado em nuvem do GitHub, que vem pré-configurado com todas as ferramentas que você precisa para executar este laboratório. + +**Para criar um codespace, navegue até [este](https://github.com/codespaces/new/mongodb-developer/genai-devday-notebooks?quickstart=1) link.** Você será solicitado a fazer login no GitHub se ainda não estiver. Uma vez logado, clique no botão **Create new codespace** para criar um novo codespace. + + + +Deixe rodar por alguns segundos enquanto prepara um container Docker com todas as bibliotecas necessárias e um cluster MongoDB. + +### Conectar ao cluster MongoDB + +Vamos primeiro conectar ao cluster MongoDB que foi criado para você. Isso permitirá que você visualize dados que importamos para o cluster mais tarde no laboratório, diretamente do IDE VSCode. + +Para fazer isso, clique no ícone de folha na barra de navegação esquerda do IDE. Esta é a extensão VSCode do MongoDB. + + + +Sob **Connections**, clique na conexão _Local MongoDB Atlas_. Isso deve automaticamente estabelecer uma conexão com o cluster MongoDB local rodando na porta 27017. + + + +Se a conexão foi bem-sucedida, você deve ver uma folha verde e uma mensagem "connected" aparecer ao redor da conexão _Local MongoDB Atlas_. + +Você também verá os bancos de dados padrão no cluster aparecer sob **Connections**. Quaisquer bancos de dados adicionais que criarmos durante o laboratório também aparecerão aqui. + + + +### Configuração do Jupyter Notebook + +Você estará preenchendo código em um Jupyter Notebook durante este laboratório, então vamos configurar isso a seguir! + +Dentro do codespace, clique no ícone de arquivos na barra de navegação esquerda do IDE. No menu Explorer, sob `notebooks`, clique no arquivo chamado `ai-rag-lab.ipynb` para abrir o Jupyter Notebook para este laboratório. + + + +É isso! Você está pronto para o laboratório! + + + + + +Para executar o laboratório localmente, siga os passos abaixo: + +* Clone o [repositório GitHub](https://github.com/mongodb-developer/genai-devday-notebooks.git) para este laboratório executando o seguinte comando do terminal: + +``` +git clone https://github.com/mongodb-developer/genai-devday-notebooks.git +``` + +* `cd` para o diretório `notebooks` do repositório clonado: + +``` +cd genai-devday-notebooks/notebooks +``` + +* Crie e ative um ambiente virtual Python: + +``` +python -m venv ai-rag-lab +source ai-rag-lab/bin/activate +``` + +* Instale as dependências para este laboratório: + +``` +pip install -r requirements.txt +``` + +* Instale e lance o Jupyter Notebook: + +``` +pip install notebook +jupyter notebook +``` + +* Na aba do navegador que aparece, abra o arquivo chamado `ai-rag-lab.ipynb`. + + + + + + \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/20-dev-env/2-setup-pre-reqs.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/20-dev-env/2-setup-pre-reqs.mdx new file mode 100644 index 00000000..e32bbd2b --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/20-dev-env/2-setup-pre-reqs.mdx @@ -0,0 +1,26 @@ +# 👐 Configurar pré-requisitos + +Execute as células sob a seção **Step 1: Setup prerequisites** no notebook. + +:::info + +Passos adicionais **se você está executando o laboratório localmente**: + +* Inicie um cluster MongoDB Atlas e obtenha sua string de conexão: + + * Registre-se para uma [conta gratuita MongoDB Atlas](https://www.mongodb.com/cloud/atlas/register) se você ainda não tiver uma + * [Crie um novo cluster de banco de dados](https://www.mongodb.com/docs/guides/atlas/cluster) + * [Obtenha a string de conexão](https://www.mongodb.com/docs/guides/atlas/connection-string) para seu cluster de banco de dados + +* Defina a variável `MONGODB_URI` para a string de conexão do seu cluster da seguinte forma: + +```python +MONGODB_URI = "" +``` + +* Defina manualmente o valor da variável `SERVERLESS_URL` da seguinte forma: + +```python +SERVERLESS_URL = "https://vtqjvgchmwcjwsrela2oyhlegu0hwqnw.lambda-url.us-west-2.on.aws/" +``` +::: \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/20-dev-env/_category_.json b/i18n/pt-BR/docusaurus-plugin-content-docs/current/20-dev-env/_category_.json new file mode 100644 index 00000000..71883aac --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/20-dev-env/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Ambiente de Desenvolvimento", + "position": 20, + "link": { + "type": "generated-index", + "description": "Configure o ambiente de desenvolvimento e pré-requisitos" + } +} \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/1-load-data.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/1-load-data.mdx new file mode 100644 index 00000000..657a5887 --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/1-load-data.mdx @@ -0,0 +1,5 @@ +# 👐 Carregar o conjunto de dados + +Primeiro, vamos baixar o conjunto de dados para o laboratório. Usaremos um subconjunto da documentação técnica do MongoDB como dados de origem para o chatbot de documentação. + +Execute todas as células sob a seção **Step 2: Load the dataset** no notebook para carregar o conjunto de dados de um arquivo local. \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/2-chunk-data.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/2-chunk-data.mdx new file mode 100644 index 00000000..a9dbe6ce --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/2-chunk-data.mdx @@ -0,0 +1,29 @@ +# 👐 Fragmentar os dados + +Como estamos trabalhando com documentos grandes, primeiro precisamos dividi-los em chunks menores antes de embuti-los e armazená-los no MongoDB. + +Preencha quaisquer espaços reservados `` e execute as células sob a seção **Step 3: Chunk up the data** no notebook para fragmentar os artigos que carregamos. + +As respostas para os blocos de código nesta seção são as seguintes: + +**CODE_BLOCK_1** + +
+Resposta +
+```python +text_splitter.split_text(text) +``` +
+
+ +**CODE_BLOCK_2** + +
+Resposta +
+```python +get_chunks(doc, "body") +``` +
+
\ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/3-embed-data.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/3-embed-data.mdx new file mode 100644 index 00000000..7cd57c7a --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/3-embed-data.mdx @@ -0,0 +1,33 @@ +# 👐 Gerar Embeddings + +Para realizar busca vetorial nos dados, precisamos embuti-los (ou seja, gerar vetores de Embeddings) antes de ingeri-los no MongoDB. + +Preencha quaisquer espaços reservados `` e execute as células sob a seção **Step 4: Generate embeddings** no notebook para embutir os artigos fragmentados. + +As respostas para os blocos de código nesta seção são as seguintes: + +**CODE_BLOCK_3** + +
+Resposta +
+```python +embedding_model.encode(text) +``` +
+
+ +**CODE_BLOCK_4** + +
+Resposta +
+```python +doc["embedding"] = get_embedding(doc["body"]) +``` +
+
+ +:::caution +Se a geração de Embeddings estiver demorando muito (> 5 min), interrompa a célula e continue para o próximo passo com os documentos que foram embutidos até aquele ponto. +::: \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/4-ingest-data.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/4-ingest-data.mdx new file mode 100644 index 00000000..fc520c8a --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/4-ingest-data.mdx @@ -0,0 +1,34 @@ +import Screenshot from "@site/src/components/Screenshot"; + +# 👐 Ingerir dados no MongoDB + +O passo final para construir um armazenamento vetorial MongoDB para o chatbot é ingerir os chunks de artigos embutidos no MongoDB. + +Preencha quaisquer espaços reservados `` e execute as células sob a seção **Step 5: Ingest data into MongoDB** no notebook para ingerir os documentos embutidos no MongoDB. + +As respostas para os blocos de código nesta seção são as seguintes: + +**CODE_BLOCK_5** + +
+Resposta +
+```python +collection.insert_many(embedded_docs) +``` +
+
+ +Para verificar visualmente que os dados foram importados para o cluster MongoDB, clique no ícone de folha na barra de navegação esquerda do IDE. + +Certifique-se de que você vê um banco de dados chamado _mongodb_genai_devday_rag_, e uma coleção chamada _knowledge_base_ sob ele. + + + +Clique na seta **>** ao lado de _knowledge_base_ e observe o número de documentos. + + + +Para visualizar um documento, clique na seta **>** ao lado de _Documents_ e selecione um documento da lista. + + \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/_category_.json b/i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/_category_.json new file mode 100644 index 00000000..277b7f32 --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/30-prepare-the-data/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Preparar os Dados", + "position": 30, + "link": { + "type": "generated-index", + "description": "Baixe, fragmente, embuta e ingira os dados" + } +} \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/1-lecture-notes.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/1-lecture-notes.mdx new file mode 100644 index 00000000..82a9cf49 --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/1-lecture-notes.mdx @@ -0,0 +1,5 @@ +# 📘 Notas da aula + +## Busca Vetorial no MongoDB + +Você pode aprender mais sobre busca vetorial no MongoDB [aqui](https://mongodb-developer.github.io/vector-search-lab/docs/key-concepts/vector-search#vector-search-in-mongodb). \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/2-create-vector-index.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/2-create-vector-index.mdx new file mode 100644 index 00000000..6c4b3816 --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/2-create-vector-index.mdx @@ -0,0 +1,20 @@ +# 👐 Criar um índice de busca vetorial + +Para recuperar documentos do MongoDB usando busca vetorial, você deve configurar um índice de busca vetorial na coleção na qual você ingeriu seus dados. Neste laboratório, você criará programaticamente índices de busca vetorial usando o driver Python do MongoDB. + +Preencha quaisquer espaços reservados `` e execute as células sob a seção **Step 6: Create a vector search index** no notebook para criar um índice de busca vetorial. + +As respostas para os blocos de código nesta seção são as seguintes: + +**CODE_BLOCK_6** + +
+Resposta +
+ +```python +create_index(collection, ATLAS_VECTOR_SEARCH_INDEX_NAME, model) +``` + +
+
\ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/3-vector-search.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/3-vector-search.mdx new file mode 100644 index 00000000..70b779f7 --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/3-vector-search.mdx @@ -0,0 +1,57 @@ +# 👐 Executar consultas de busca vetorial + +Agora vamos executar algumas consultas de busca vetorial contra os dados presentes no MongoDB. + +Preencha quaisquer espaços reservados `` e execute as células sob a seção **Step 7: Perform vector search on your data** no notebook para executar consultas de busca vetorial contra seus dados. + +As respostas para os blocos de código nesta seção são as seguintes: + +**CODE_BLOCK_7** + +
+Resposta +
+```python +get_embedding(user_query) +``` +
+
+ +**CODE_BLOCK_8** + +
+Resposta +
+```json +[ + { + "$vectorSearch": { + "index": ATLAS_VECTOR_SEARCH_INDEX_NAME, + "queryVector": query_embedding, + "path": "embedding", + "numCandidates": 150, + "limit": 5 + } + }, + { + "$project": { + "_id": 0, + "body": 1, + "score": {"$meta": "vectorSearchScore"} + } + } +] +``` +
+
+ +**CODE_BLOCK_9** + +
+Resposta +
+```python +collection.aggregate(pipeline) +``` +
+
\ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/4-pre-filtering.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/4-pre-filtering.mdx new file mode 100644 index 00000000..e59381e2 --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/4-pre-filtering.mdx @@ -0,0 +1,126 @@ +# 🦹 Combinar pré-filtragem com busca vetorial + +Pré-filtragem é uma técnica para otimizar a busca vetorial considerando apenas documentos que correspondem a certos critérios durante a busca vetorial. + +Nesta seção, você aprenderá como combinar filtros com busca vetorial. Isso envolve principalmente: +* Atualizar o índice de busca vetorial para incluir os campos de filtro apropriados +* Atualizar o estágio `$vectorSearch` na definição do pipeline de agregação para incluir os filtros + +Preencha quaisquer espaços reservados `` e execute as células sob a seção **🦹‍♀️ Combine pre-filtering with vector search** no notebook para experimentar combinando pré-filtros com suas consultas de busca vetorial. + +As respostas para os blocos de código nesta seção são as seguintes: + +**CODE_BLOCK_10** + +
+Resposta +
+```json +{ + "name": ATLAS_VECTOR_SEARCH_INDEX_NAME, + "type": "vectorSearch", + "definition": { + "fields": [ + { + "type": "vector", + "path": "embedding", + "numDimensions": 384, + "similarity": "cosine" + }, + {"type": "filter", "path": "metadata.productName"} + ] + } +} +``` +
+
+ +**CODE_BLOCK_11** + +
+Resposta +
+```json +[ + { + "$vectorSearch": { + "index": ATLAS_VECTOR_SEARCH_INDEX_NAME, + "path": "embedding", + "queryVector": query_embedding, + "numCandidates": 150, + "limit": 5, + "filter": {"metadata.productName": "MongoDB Atlas"} + } + }, + { + "$project": { + "_id": 0, + "body": 1, + "score": {"$meta": "vectorSearchScore"} + } + } +] +``` +
+
+ +**CODE_BLOCK_12** + +
+Resposta +
+```json +{ + "name": ATLAS_VECTOR_SEARCH_INDEX_NAME, + "type": "vectorSearch", + "definition": { + "fields": [ + { + "type": "vector", + "path": "embedding", + "numDimensions": 384, + "similarity": "cosine" + }, + {"type": "filter", "path": "metadata.contentType"}, + {"type": "filter", "path": "updated"} + ] + } +} +``` +
+
+ +**CODE_BLOCK_13** + +
+Resposta +
+```json +[ + { + "$vectorSearch": { + "index": ATLAS_VECTOR_SEARCH_INDEX_NAME, + "path": "embedding", + "queryVector": query_embedding, + "numCandidates": 150, + "limit": 5, + "filter": { + "$and": [ + {"metadata.contentType": "Tutorial"}, + {"updated": {"$gte": "2024-05-19"}} + ] + } + } + }, + { + "$project": { + "_id": 0, + "body": 1, + "updated": 1, + "score": {"$meta": "vectorSearchScore"} + } + } +] +``` +
+
\ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/_category_.json b/i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/_category_.json new file mode 100644 index 00000000..7b191bc8 --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/40-perform-vector-search/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Realizar Busca Vetorial nos Seus Dados", + "position": 40, + "link": { + "type": "generated-index", + "description": "Execute consultas de busca vetorial usando MongoDB Atlas Vector Search" + } +} \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/50-build-rag-app/1-build-rag-app.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/50-build-rag-app/1-build-rag-app.mdx new file mode 100644 index 00000000..4c471779 --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/50-build-rag-app/1-build-rag-app.mdx @@ -0,0 +1,40 @@ +# 👐 Construir a aplicação RAG + +Vamos criar um fluxo de trabalho RAG simples que recebe uma consulta do usuário, recupera documentos contextualmente relevantes do MongoDB Atlas, e passa a consulta e o contexto recuperado para um LLM para gerar uma resposta à pergunta do usuário. + +Preencha quaisquer espaços reservados `` e execute as células sob a seção **Step 8: Build the RAG application** no notebook para construir a "aplicação" RAG. + +As respostas para os blocos de código nesta seção são as seguintes: + +**CODE_BLOCK_14** + +
+Resposta +
+```python +vector_search(user_query) +``` +
+
+ +**CODE_BLOCK_15** + +
+Resposta +
+```python +create_prompt(user_query) +``` +
+
+ +**CODE_BLOCK_16** + +
+Resposta +
+```python +[{"role": "user", "content": prompt}] +``` +
+
diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/50-build-rag-app/2-add-reranking.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/50-build-rag-app/2-add-reranking.mdx new file mode 100644 index 00000000..30c7e128 --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/50-build-rag-app/2-add-reranking.mdx @@ -0,0 +1,20 @@ +# 🦹 Reordenar resultados recuperados + +Reordenadores são modelos especializados que são treinados para calcular a relevância entre pares de consulta-documento. Sem reordenação, a ordem dos resultados recuperados é governada pelo modelo de embeddings, que não é otimizado para relevância e pode levar a baixa recuperação do LLM em aplicações RAG. + +Preencha quaisquer espaços reservados `` e execute as células sob a seção **🦹‍♀️ Re-rank retrieved results** no notebook para adicionar um estágio de reordenação ao chatbot. + +As respostas para os blocos de código nesta seção são as seguintes: + +**CODE_BLOCK_17** + +
+Resposta +
+```python +rerank_model.rank( + user_query, documents, return_documents=True, top_k=5 +) +``` +
+
\ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/50-build-rag-app/_category_.json b/i18n/pt-BR/docusaurus-plugin-content-docs/current/50-build-rag-app/_category_.json new file mode 100644 index 00000000..61db70e0 --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/50-build-rag-app/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Construir a Aplicação RAG", + "position": 50, + "link": { + "type": "generated-index", + "description": "Construa a aplicação RAG" + } +} \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/60-add-memory/1-add-memory.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/60-add-memory/1-add-memory.mdx new file mode 100644 index 00000000..c32f3e5e --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/60-add-memory/1-add-memory.mdx @@ -0,0 +1,74 @@ +# 👐 Adicionar memória à aplicação RAG + +Em muitas aplicações de perguntas e respostas, queremos permitir que o usuário tenha uma conversa de ida e volta com o LLM, significando que a aplicação precisa de algum tipo de "memória" de perguntas e respostas passadas, e alguma lógica para incorporar essas em seu pensamento atual. Nesta seção, você recuperará o histórico de mensagens de chat do MongoDB e o incorporará em sua aplicação RAG. + +Preencha quaisquer espaços reservados `` e execute as células sob a seção **Step 9: Add memory to the RAG application** no notebook para adicionar memória ao chatbot. + +As respostas para os blocos de código nesta seção são as seguintes: + +**CODE_BLOCK_18** + +
+Resposta +
+```python +history_collection.create_index("session_id") +``` +
+
+ +**CODE_BLOCK_19** + +
+Resposta +
+```python +history_collection.insert_one(message) +``` +
+
+ +**CODE_BLOCK_20** + +
+Resposta +
+```python +history_collection.find({"session_id": session_id}).sort("timestamp", 1) +``` +
+
+ +**CODE_BLOCK_21** + +
+Resposta +
+```python +retrieve_session_history(session_id) +``` +
+
+ +**CODE_BLOCK_22** + +
+Resposta +
+```python +{"role": "user", "content": user_query} +``` +
+
+ +**CODE_BLOCK_23** + +
+Resposta +
+```python +store_chat_message(session_id, "user", user_query) +store_chat_message(session_id, "assistant", answer) +``` +
+
\ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/60-add-memory/_category_.json b/i18n/pt-BR/docusaurus-plugin-content-docs/current/60-add-memory/_category_.json new file mode 100644 index 00000000..cf0ddaf3 --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/60-add-memory/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Adicionar memória à aplicação RAG", + "position": 60, + "link": { + "type": "generated-index", + "description": "Incorpore o histórico de conversas na aplicação RAG" + } +} \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/intro.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/intro.mdx new file mode 100644 index 00000000..7f5ceebd --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/intro.mdx @@ -0,0 +1,26 @@ +--- +sidebar_position: 0 +--- + +# Introdução + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +|Objetivos do Laboratório|Aprenda como construir um chatbot de documentação| +|:-|:-| +|O que você aprenderá|O que é RAG | +||Componentes de uma aplicação RAG| +||Realizar consultas de busca vetorial usando MongoDB Atlas Vector Search| +||Construir um chatbot de documentação baseado em RAG usando MongoDB Atlas| +||Adicionar memória ao seu chatbot| +|Tempo para completar|90 minutos| + +Na barra de navegação e em algumas páginas, você notará alguns ícones. Aqui está o significado deles: + +|Ícone|Significado| Descrição +|-|-|-| +|📘|Material de aula|Se você está seguindo uma sessão conduzida por instrutor, eles provavelmente já cobriram isso.| +|👐|Conteúdo prático|Prepare-se para fazer algum trabalho prático. Você deve seguir estes passos.| +|📚|Documentação| Documentação de referência para as partes práticas do laboratório.| +|🦹|Conteúdo avançado|Este conteúdo não é coberto durante o laboratório, mas se você estiver interessado em aprender mais, pode conferir.| \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-plugin-content-docs/current/summary.mdx b/i18n/pt-BR/docusaurus-plugin-content-docs/current/summary.mdx new file mode 100644 index 00000000..44f383ac --- /dev/null +++ b/i18n/pt-BR/docusaurus-plugin-content-docs/current/summary.mdx @@ -0,0 +1,17 @@ +--- +sidebar_position: 100 +--- + +# 🎯 Resumo + +Parabéns! Seguindo este laboratório, você conseguiu com sucesso: +* aprender o que é Geração Aumentada por Recuperação, também conhecida como RAG +* aprender quando usar RAG +* aprender como realizar busca vetorial contra dados no MongoDB +* construir uma aplicação RAG +* adicionar memória à sua aplicação RAG + +Aqui estão alguns recursos que podem ser úteis: +* [Hub de Aprendizado de IA](https://www.mongodb.com/resources/use-cases/artificial-intelligence?utm_campaign=devrel&utm_source=devday&utm_medium=cta&utm_content=ai_rag_workshop&utm_term=apoorva_joshi) +* [Repositório de Exemplos de Código GenAI](https://github.com/mongodb-developer/GenAI-Showcase) +* [Fóruns da Comunidade GenAI](https://www.mongodb.com/community/forums/c/generative-ai/162/?utm_campaign=devrel&utm_source=devday&utm_medium=cta&utm_content=ai_rag_workshop&utm_term=apoorva_joshi) \ No newline at end of file diff --git a/i18n/pt-BR/docusaurus-theme-classic/footer.json b/i18n/pt-BR/docusaurus-theme-classic/footer.json new file mode 100644 index 00000000..9824d5c2 --- /dev/null +++ b/i18n/pt-BR/docusaurus-theme-classic/footer.json @@ -0,0 +1,26 @@ +{ + "link.item.label.Try MongoDB Atlas": { + "message": "Experimente MongoDB Atlas", + "description": "The label of footer link with label=Try MongoDB Atlas linking to https://www.mongodb.com/try?utm_campaign=devrel&utm_source=workshop&utm_medium=cta&utm_content=ai_rag_lab&utm_term=apoorva.joshi" + }, + "link.item.label.Developer Center": { + "message": "Centro de Desenvolvedores", + "description": "The label of footer link with label=Developer Center linking to https://www.mongodb.com/developer?utm_campaign=devrel&utm_source=workshop&utm_medium=cta&utm_content=ai_rag_lab&utm_term=apoorva.joshi" + }, + "link.item.label.GenAI Code Examples": { + "message": "Exemplos de Código GenAI", + "description": "The label of footer link with label=GenAI Code Examples linking to https://github.com/mongodb-developer/GenAI-Showcase" + }, + "link.item.label.GenAI Forums": { + "message": "Fóruns GenAI", + "description": "The label of footer link with label=GenAI Forums linking to https://www.mongodb.com/community/forums/c/generative-ai/162?utm_campaign=devrel&utm_source=workshop&utm_medium=cta&utm_content=ai_rag_lab&utm_term=apoorva.joshi" + }, + "link.item.label.This lab in GitHub": { + "message": "Este laboratório no GitHub", + "description": "The label of footer link with label=This lab in GitHub linking to https://github.com/mongodb-developer/ai-rag-lab" + }, + "copyright": { + "message": "© 2025 MongoDB, Inc.", + "description": "The footer copyright" + } +} diff --git a/i18n/pt-BR/docusaurus-theme-classic/navbar.json b/i18n/pt-BR/docusaurus-theme-classic/navbar.json new file mode 100644 index 00000000..34301381 --- /dev/null +++ b/i18n/pt-BR/docusaurus-theme-classic/navbar.json @@ -0,0 +1,10 @@ +{ + "title": { + "message": "Construa Aplicações RAG usando MongoDB", + "description": "The title in the navbar" + }, + "logo.alt": { + "message": "Logo MongoDB", + "description": "The alt text of navbar logo" + } +} diff --git a/src/components/HomepageFeatures/index.js b/src/components/HomepageFeatures/index.js index 684b127f..6abbbdd9 100644 --- a/src/components/HomepageFeatures/index.js +++ b/src/components/HomepageFeatures/index.js @@ -1,9 +1,10 @@ import React from 'react'; import clsx from 'clsx'; +import Translate from '@docusaurus/Translate'; import styles from './styles.module.css'; import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; -function Feature({ illustration, title, description, fetchPriority }) { +function Feature({ illustration, title, description, fetchPriority, featureKey }) { return (
@@ -18,8 +19,16 @@ function Feature({ illustration, title, description, fetchPriority }) { />
-

{title}

-

{description}

+

+ + {title} + +

+

+ + {description} + +

); @@ -28,12 +37,19 @@ function Feature({ illustration, title, description, fetchPriority }) { export default function HomepageFeatures() { const { siteConfig } = useDocusaurusContext(); + const featureKeys = ['handsOn', 'instructors', 'material']; + return (
{siteConfig.customFields.featureList.map((props, idx) => ( - + ))}
diff --git a/src/pages/index.js b/src/pages/index.js index b3226cd0..e09cbd1a 100644 --- a/src/pages/index.js +++ b/src/pages/index.js @@ -2,6 +2,7 @@ import React from 'react'; import clsx from 'clsx'; import Link from '@docusaurus/Link'; import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; +import Translate from '@docusaurus/Translate'; import Layout from '@theme/Layout'; import HomepageFeatures from '@site/src/components/HomepageFeatures'; @@ -12,13 +13,19 @@ function HomepageHeader() { return (
-

{siteConfig.title}

+

+ + {siteConfig.title} + +

{siteConfig.tagline}

- {siteConfig.customFields.startButtonTitle} + + {siteConfig.customFields.startButtonTitle} +