|
168 | 168 | }, |
169 | 169 | { |
170 | 170 | "cell_type": "code", |
171 | | - "execution_count": null, |
| 171 | + "execution_count": 6, |
172 | 172 | "metadata": {}, |
173 | 173 | "outputs": [ |
174 | 174 | { |
175 | | - "ename": "Exception", |
176 | | - "evalue": "Error 500: {\"detail\":\"Object of type ExtractKeyDescriptionPair is not JSON serializable\"}", |
177 | | - "output_type": "error", |
178 | | - "traceback": [ |
179 | | - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", |
180 | | - "\u001b[31mException\u001b[39m Traceback (most recent call last)", |
181 | | - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 11\u001b[39m\n\u001b[32m 3\u001b[39m extract_instruction = {\n\u001b[32m 4\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mdocument_owner\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33mname of the document owner\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 5\u001b[39m \u001b[33m\"\u001b[39m\u001b[33minvoice_num\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33minvoice number\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 6\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mpo_num\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33mpurchase order number\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 7\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mdate\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33mdate of the invoice\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 8\u001b[39m }\n\u001b[32m 10\u001b[39m \u001b[38;5;66;03m# extract returns a tuple containing the markdown as a string and total time\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m11\u001b[39m file_id = \u001b[43map\u001b[49m\u001b[43m.\u001b[49m\u001b[43masync_extract_key_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_path\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexample_local_file\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfile_type\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mpdf\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextract_instruction\u001b[49m\u001b[43m=\u001b[49m\u001b[43mextract_instruction\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 13\u001b[39m \u001b[38;5;66;03m# fetch results (5s polling up to 60s)\u001b[39;00m\n\u001b[32m 14\u001b[39m result = ap.async_fetch(file_id, sync_timeout=\u001b[32m60\u001b[39m, sync_interval=\u001b[32m5\u001b[39m)\n", |
182 | | - "\u001b[36mFile \u001b[39m\u001b[32m~/any-parser/any_parser/any_parser.py:78\u001b[39m, in \u001b[36mhandle_file_processing.<locals>.wrapper\u001b[39m\u001b[34m(self, file_path, file_content, file_type, *args, **kwargs)\u001b[39m\n\u001b[32m 74\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 75\u001b[39m \u001b[38;5;66;03m# generate a random file path for genrating presigned url\u001b[39;00m\n\u001b[32m 76\u001b[39m file_path = \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m/tmp/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00muuid.uuid4()\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m---> \u001b[39m\u001b[32m78\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 79\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 80\u001b[39m \u001b[43m \u001b[49m\u001b[43mfile_path\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfile_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 81\u001b[39m \u001b[43m \u001b[49m\u001b[43mfile_content\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfile_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 82\u001b[39m \u001b[43m \u001b[49m\u001b[43mfile_type\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfile_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 83\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 84\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 85\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", |
183 | | - "\u001b[36mFile \u001b[39m\u001b[32m~/any-parser/any_parser/any_parser.py:458\u001b[39m, in \u001b[36mAnyParser.async_extract_key_value\u001b[39m\u001b[34m(self, file_path, file_content, file_type, extract_instruction)\u001b[39m\n\u001b[32m 455\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 456\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33m\"\u001b[39m\u001b[33mextract_instruction must be a dict or list\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m458\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_async_parser\u001b[49m\u001b[43m.\u001b[49m\u001b[43msend_async_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 459\u001b[39m \u001b[43m \u001b[49m\u001b[43mprocess_type\u001b[49m\u001b[43m=\u001b[49m\u001b[43mProcessType\u001b[49m\u001b[43m.\u001b[49m\u001b[43mEXTRACT_KEY_VALUE\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 460\u001b[39m \u001b[43m \u001b[49m\u001b[43mfile_path\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfile_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore\u001b[39;49;00m\n\u001b[32m 461\u001b[39m \u001b[43m \u001b[49m\u001b[43mfile_content\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfile_content\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore\u001b[39;49;00m\n\u001b[32m 462\u001b[39m \u001b[43m \u001b[49m\u001b[43mfile_type\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfile_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore\u001b[39;49;00m\n\u001b[32m 463\u001b[39m \u001b[43m \u001b[49m\u001b[43mextract_args\u001b[49m\u001b[43m=\u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mextract_instruction\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mformatted_instruction\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 464\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", |
184 | | - "\u001b[36mFile \u001b[39m\u001b[32m~/any-parser/any_parser/async_parser.py:84\u001b[39m, in \u001b[36mAsyncParser.send_async_request\u001b[39m\u001b[34m(self, process_type, file_path, file_content, file_type, extract_args)\u001b[39m\n\u001b[32m 76\u001b[39m response = requests.post(\n\u001b[32m 77\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m._base_url\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mendpoint\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m,\n\u001b[32m 78\u001b[39m headers=\u001b[38;5;28mself\u001b[39m._headers,\n\u001b[32m 79\u001b[39m data=json.dumps(payload),\n\u001b[32m 80\u001b[39m timeout=TIMEOUT,\n\u001b[32m 81\u001b[39m )\n\u001b[32m 83\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m response.status_code != \u001b[32m200\u001b[39m:\n\u001b[32m---> \u001b[39m\u001b[32m84\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mError \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse.status_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse.text\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 86\u001b[39m response_data = response.json()\n\u001b[32m 87\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m response_data[\u001b[33m\"\u001b[39m\u001b[33mjob_id\u001b[39m\u001b[33m\"\u001b[39m]\n", |
185 | | - "\u001b[31mException\u001b[39m: Error 500: {\"detail\":\"Object of type ExtractKeyDescriptionPair is not JSON serializable\"}" |
| 175 | + "name": "stdout", |
| 176 | + "output_type": "stream", |
| 177 | + "text": [ |
| 178 | + "[{'key': 'document_owner', 'description': 'name of the document owner'}, {'key': 'invoice_num', 'description': 'invoice number'}, {'key': 'po_num', 'description': 'purchase order number'}, {'key': 'date', 'description': 'date of the invoice'}]\n" |
186 | 179 | ] |
| 180 | + }, |
| 181 | + { |
| 182 | + "name": "stdout", |
| 183 | + "output_type": "stream", |
| 184 | + "text": [ |
| 185 | + "Waiting for response...\n", |
| 186 | + "Waiting for response...\n", |
| 187 | + "Waiting for response...\n", |
| 188 | + "Waiting for response...\n" |
| 189 | + ] |
| 190 | + }, |
| 191 | + { |
| 192 | + "data": { |
| 193 | + "text/plain": [ |
| 194 | + "\"{'document_owner': ['Cambio Corp'], 'invoice_num': ['INV11111111'], 'po_num': ['PO22222222'], 'date': ['06-SEPT-2024']}\"" |
| 195 | + ] |
| 196 | + }, |
| 197 | + "metadata": {}, |
| 198 | + "output_type": "display_data" |
187 | 199 | } |
188 | 200 | ], |
189 | 201 | "source": [ |
|
199 | 211 | "# extract returns a tuple containing the markdown as a string and total time\n", |
200 | 212 | "file_id = ap.async_extract_key_value(file_path=example_local_file, file_type=\"pdf\", extract_instruction=extract_instruction)\n", |
201 | 213 | "\n", |
202 | | - "# fetch results (5s polling up to 60s)\n", |
203 | | - "result = ap.async_fetch(file_id, sync_timeout=60, sync_interval=5)\n", |
| 214 | + "# fetch results (3s polling up to 60s)\n", |
| 215 | + "result = ap.async_fetch(file_id, sync_timeout=60, sync_interval=3)\n", |
204 | 216 | "display(result)" |
205 | 217 | ] |
206 | 218 | }, |
|
0 commit comments