Skip to content

[DO NOT REVIEW] In Progress: API Experiment 1 #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 13 commits into
base: form-recognizer-v2.0-GA
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions eng/Packages.Data.props
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
<PackageReference Update="Microsoft.CodeAnalysis.CSharp.Workspaces" Version="2.6.1" />
<PackageReference Update="Microsoft.CodeAnalysis.FxCopAnalyzers" Version="2.6.2" />
<PackageReference Update="Microsoft.CodeAnalysis" Version="2.3.0" />
<PackageReference Update="Microsoft.CSharp" Version="4.7.0" />
<PackageReference Update="Microsoft.Extensions.Configuration.Abstractions" Version="2.1.0" />
<PackageReference Update="Microsoft.Extensions.Configuration.Binder" Version="2.1.0" />
<PackageReference Update="Microsoft.Extensions.Configuration.Json" Version="1.0.2" />
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">

<ItemGroup>
<PackageReference Include="Microsoft.CSharp" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\src\Azure.AI.FormRecognizer.csproj" />
Expand Down
116 changes: 116 additions & 0 deletions sdk/formrecognizer/samples/PredictionSample_CustomSupervised.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using Azure.AI.FormRecognizer.Prediction;
using System;
using System.IO;
using System.Text;
using System.Threading.Tasks;

namespace Azure.AI.FormRecognizer.Samples
{
public class PredictionSample_CustomSupervised
{
//public static async Task Main(string[] args)
//{
// try
// {
// await Analyze();
// }
// catch (Exception ex)
// {
// Console.WriteLine(ex);
// }
//}

private static async Task Analyze()
{
string endpoint = Environment.GetEnvironmentVariable("FORM_RECOGNIZER_ENDPOINT");
string subscriptionKey = Environment.GetEnvironmentVariable("FORM_RECOGNIZER_SUBSCRIPTION_KEY");
var options = new FormRecognizerAnalysisClientOptions();
var credential = new CognitiveKeyCredential(subscriptionKey);
var client = new FormRecognizerAnalysisClient(new Uri(endpoint), credential, options);
string modelId = "a36ff8a9-d7b3-4ee6-92d0-6e6eb73816c7";

var filePath = @"C:\src\samples\cognitive\formrecognizer\sample_data\Test\Invoice_6.pdf";
var stream = File.OpenRead(filePath);

//var op = await client.GetModelReference(modelId).StartAnalyzeAsync(stream, null, includeTextDetails: false);

var op = client.StartCustomSupervisedAnalysis(modelId, stream);
Console.WriteLine($"Created request with id {op.Id}");
Console.WriteLine("Waiting for completion...");
await op.WaitForCompletionAsync(TimeSpan.FromSeconds(1));
if (op.HasValue)
{
CustomSupervisedAnalysisResult result = op.Value;

// Print form fields
foreach (var form in result.Forms)
{
Console.WriteLine($"In form found on pages {form.FormPageRange.Item1} - {form.FormPageRange.Item2}: ");

foreach (var field in form.Fields)
{
// TODO: Would it be better to implement ToString here, instead of making users write out "Text"?

// This is what unsupervised looked like:
// Console.WriteLine($"Found field {field.FieldName.Text} with value {field.FieldValue.Text}");

Console.WriteLine($"Found field {field.Name} with value {field.Value}");
}
}

// Print OCR Values
foreach (var page in result.ExtractedPages)
{
Console.WriteLine($"On page {page.PageNumber}: ");

foreach (var line in page.Lines)
{
Console.WriteLine($"Line text is {line.Text}, and composed of the words:");

foreach (var word in line.Words)
{
Console.WriteLine($"Word: {word.Text}, Confidence: {word.Confidence}");
}
}
}

//var keyText = op.Value.AnalyzeResult.PageResults[0].KeyValuePairs[0].Key.Text;
//var valueText = op.Value.AnalyzeResult.PageResults[0].KeyValuePairs[0].Value.Text;

//var fieldName = op.Value.AnalyzeResult.DocumentResults[0].Fields.Keys.First();
//var fieldValue = op.Value.AnalyzeResult.DocumentResults[0].Fields[fieldName].Text;

//Analysis analysis = op.Value;
//var documentResults = analysis.AnalyzeResult.DocumentResults;
//var pageResults = analysis.AnalyzeResult.PageResults;
//var readResults = analysis.AnalyzeResult.ReadResults;
////readResults[0].
////pageResults[0].
//documentResults[0].Fields["key"].

////foreach (var documentResult in documentResults)
////{
////}

//// Console.WriteLine($"Status: {op.Value.Status}");
//PrintResponse(op.GetRawResponse());
}
else
{
Console.WriteLine("error!");
}
}

private static void PrintResponse(Response response)
{
var mem = new MemoryStream();
response.ContentStream.Position = 0;
response.ContentStream.CopyTo(mem);
var body = Encoding.UTF8.GetString(mem.ToArray());
Console.WriteLine(body);
}
}
}
111 changes: 111 additions & 0 deletions sdk/formrecognizer/samples/PredictionSample_CustomUnsupervised.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using Azure.AI.FormRecognizer.Prediction;
using System;
using System.IO;
using System.Text;
using System.Threading.Tasks;

namespace Azure.AI.FormRecognizer.Samples
{
public class PredictionSample_CustomUnsupervised
{
//public static async Task Main(string[] args)
//{
// try
// {
// await Analyze();
// }
// catch (Exception ex)
// {
// Console.WriteLine(ex);
// }
//}

private static async Task Analyze()
{
string endpoint = Environment.GetEnvironmentVariable("FORM_RECOGNIZER_ENDPOINT");
string subscriptionKey = Environment.GetEnvironmentVariable("FORM_RECOGNIZER_SUBSCRIPTION_KEY");
var options = new FormRecognizerAnalysisClientOptions();
var credential = new CognitiveKeyCredential(subscriptionKey);
var client = new FormRecognizerAnalysisClient(new Uri(endpoint), credential, options);
string modelId = "a36ff8a9-d7b3-4ee6-92d0-6e6eb73816c7";

var filePath = @"C:\src\samples\cognitive\formrecognizer\sample_data\Test\Invoice_6.pdf";
var stream = File.OpenRead(filePath);
//var op = await client.GetModelReference(modelId).StartAnalyzeAsync(stream, null, includeTextDetails: false);
var op = client.StartCustomUnsupervisedAnalysis(modelId, stream);
Console.WriteLine($"Created request with id {op.Id}");
Console.WriteLine("Waiting for completion...");
await op.WaitForCompletionAsync(TimeSpan.FromSeconds(1));
if (op.HasValue)
{
CustomUnsupervisedAnalysisResult result = op.Value;

// Print form fields
foreach (var page in result.PageValues)
{
Console.WriteLine($"On page {page.PageNumber}: ");

foreach (var field in page.PageFields)
{
// TODO: Would it be better to implement ToString here, instead of making users write out "Text"?

Console.WriteLine($"Found field {field.FieldName.Text} with value {field.FieldValue.Text}");
}
}

// Print OCR Values
foreach (var page in result.ExtractedPages)
{
Console.WriteLine($"On page {page.PageNumber}: ");

foreach (var line in page.Lines)
{
Console.WriteLine($"Line text is {line.Text}, and composed of the words:");

foreach (var word in line.Words)
{
Console.WriteLine($"Word: {word.Text}, Confidence: {word.Confidence}");
}
}
}

//var keyText = op.Value.AnalyzeResult.PageResults[0].KeyValuePairs[0].Key.Text;
//var valueText = op.Value.AnalyzeResult.PageResults[0].KeyValuePairs[0].Value.Text;

//var fieldName = op.Value.AnalyzeResult.DocumentResults[0].Fields.Keys.First();
//var fieldValue = op.Value.AnalyzeResult.DocumentResults[0].Fields[fieldName].Text;

//Analysis analysis = op.Value;
//var documentResults = analysis.AnalyzeResult.DocumentResults;
//var pageResults = analysis.AnalyzeResult.PageResults;
//var readResults = analysis.AnalyzeResult.ReadResults;
////readResults[0].
////pageResults[0].
//documentResults[0].Fields["key"].

////foreach (var documentResult in documentResults)
////{
////}

//// Console.WriteLine($"Status: {op.Value.Status}");
//PrintResponse(op.GetRawResponse());
}
else
{
Console.WriteLine("error!");
}
}

private static void PrintResponse(Response response)
{
var mem = new MemoryStream();
response.ContentStream.Position = 0;
response.ContentStream.CopyTo(mem);
var body = Encoding.UTF8.GetString(mem.ToArray());
Console.WriteLine(body);
}
}
}
110 changes: 110 additions & 0 deletions sdk/formrecognizer/samples/PredictionSample_Layout.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using Azure.AI.FormRecognizer.Prediction;
using System;
using System.IO;
using System.Text;
using System.Threading;
using System.Threading.Tasks;

namespace Azure.AI.FormRecognizer.Samples
{
public class PredictionSample_Layout
{
public static async Task Main(string[] args)
{
try
{
await Analyze();
}
catch (Exception ex)
{
Console.WriteLine(ex);
}
}

private static async Task Analyze()
{
string endpoint = Environment.GetEnvironmentVariable("FORM_RECOGNIZER_ENDPOINT");
string subscriptionKey = Environment.GetEnvironmentVariable("FORM_RECOGNIZER_SUBSCRIPTION_KEY");
var options = new FormRecognizerAnalysisClientOptions();
var credential = new CognitiveKeyCredential(subscriptionKey);
var client = new FormRecognizerAnalysisClient(new Uri(endpoint), credential, options);

var filePath = @"C:\src\samples\cognitive\formrecognizer\sample_data\Test\Receipt_6.pdf";
var stream = File.OpenRead(filePath);

var op = client.StartFormInsetAnalysis(stream);
Console.WriteLine($"Created request with id {op.Id}");
Console.WriteLine("Waiting for completion...");
await op.WaitForCompletionAsync(TimeSpan.FromSeconds(1));
if (op.HasValue)
{
FormInsetAnalysisResult result = op.Value;

Console.WriteLine($"Form Inset Analysis found the following insets: ");

foreach (var table in result.ExtractedTables)
{
Console.WriteLine($"Table on page {table.PageNumber} has {table.Rows} rows and {table.Columns} columns, and values:");

foreach (var cell in table.Cells)
{
Console.WriteLine($" ({cell.ColumnIndex}, {cell.RowIndex}): {cell.Text}"); // TODO: note, cell value not typed.
}
}

// Print OCR Values
foreach (var page in result.ExtractedPages)
{
Console.WriteLine($"On page {page.PageNumber}: ");

foreach (var line in page.Lines)
{
Console.WriteLine($"Line text is {line.Text}, and composed of the words:");

foreach (var word in line.Words)
{
Console.WriteLine($"Word: {word.Text}, Confidence: {word.Confidence}");
}
}
}

//var keyText = op.Value.AnalyzeResult.PageResults[0].KeyValuePairs[0].Key.Text;
//var valueText = op.Value.AnalyzeResult.PageResults[0].KeyValuePairs[0].Value.Text;

//var fieldName = op.Value.AnalyzeResult.DocumentResults[0].Fields.Keys.First();
//var fieldValue = op.Value.AnalyzeResult.DocumentResults[0].Fields[fieldName].Text;

//Analysis analysis = op.Value;
//var documentResults = analysis.AnalyzeResult.DocumentResults;
//var pageResults = analysis.AnalyzeResult.PageResults;
//var readResults = analysis.AnalyzeResult.ReadResults;
////readResults[0].
////pageResults[0].
//documentResults[0].Fields["key"].

////foreach (var documentResult in documentResults)
////{
////}

//// Console.WriteLine($"Status: {op.Value.Status}");
//PrintResponse(op.GetRawResponse());
}
else
{
Console.WriteLine("error!");
}
}

private static void PrintResponse(Response response)
{
var mem = new MemoryStream();
response.ContentStream.Position = 0;
response.ContentStream.CopyTo(mem);
var body = Encoding.UTF8.GetString(mem.ToArray());
Console.WriteLine(body);
}
}
}
Loading