Skip to content

Commit 7115f03

Browse files
author
Ratin Kumar
committed
allow adding files using url
1 parent 10d26c4 commit 7115f03

File tree

3 files changed

+131
-17
lines changed

3 files changed

+131
-17
lines changed

backend/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
requests
12
alembic==1.2.1
23
Flask==1.1.1
34
Flask-Login==0.4.1

backend/routes/data.py

Lines changed: 98 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import sqlalchemy as sa
33
import uuid
4+
import requests
45

56
from pathlib import Path
67

@@ -25,8 +26,7 @@ def send_audio_file(file_name):
2526

2627

2728
def validate_segmentation(segment):
28-
"""Validate the segmentation before accepting the annotation's upload from users
29-
"""
29+
"""Validate the segmentation before accepting the annotation's upload from users"""
3030
required_key = {"start_time", "end_time", "transcription"}
3131

3232
if set(required_key).issubset(segment.keys()):
@@ -44,8 +44,7 @@ def generate_segmentation(
4444
data_id,
4545
segmentation_id=None,
4646
):
47-
"""Generate a Segmentation from the required segment information
48-
"""
47+
"""Generate a Segmentation from the required segment information"""
4948
if segmentation_id is None:
5049
segmentation = Segmentation(
5150
data_id=data_id,
@@ -190,3 +189,98 @@ def add_data():
190189
),
191190
201,
192191
)
192+
193+
194+
def download_file(url, save_path=None):
195+
local_filename = url.split("/")[-1] if save_path is None else save_path
196+
with requests.get(url, stream=True) as r:
197+
r.raise_for_status()
198+
with open(local_filename, "wb") as f:
199+
for chunk in r.iter_content(chunk_size=8192):
200+
f.write(chunk)
201+
return local_filename
202+
203+
204+
@api.route("/dataWithUrl", methods=["POST"])
205+
def add_data_from_url():
206+
api_key = request.headers.get("Authorization", None)
207+
208+
if not api_key:
209+
raise BadRequest(description="API Key missing from `Authorization` Header")
210+
211+
project = Project.query.filter_by(api_key=api_key).first()
212+
213+
if not project:
214+
raise NotFound(description="No project exist with given API Key")
215+
216+
username = request.form.get("username", None)
217+
user = User.query.filter_by(username=username).first()
218+
219+
if not user:
220+
raise NotFound(description="No user found with given username")
221+
222+
segmentations = request.form.get("segmentations", "[]")
223+
reference_transcription = request.form.get("reference_transcription", None)
224+
data_url = request.form.get("data_url", None)
225+
is_marked_for_review = bool(request.form.get("is_marked_for_review", False))
226+
227+
if data_url is None:
228+
return 404
229+
230+
original_filename = secure_filename(data_url.split("/")[-1])
231+
232+
extension = Path(original_filename).suffix.lower()
233+
234+
if len(extension) > 1 and extension[1:] not in ALLOWED_EXTENSIONS:
235+
raise BadRequest(description="File format is not supported")
236+
237+
filename = f"{str(uuid.uuid4().hex)}{extension}"
238+
239+
file_path = Path(app.config["UPLOAD_FOLDER"]).joinpath(filename)
240+
download_file(data_url, file_path.as_posix())
241+
242+
data = Data(
243+
project_id=project.id,
244+
filename=filename,
245+
original_filename=original_filename,
246+
reference_transcription=reference_transcription,
247+
is_marked_for_review=is_marked_for_review,
248+
assigned_user_id=user.id,
249+
)
250+
db.session.add(data)
251+
db.session.flush()
252+
253+
segmentations = json.loads(segmentations)
254+
255+
new_segmentations = []
256+
257+
for segment in segmentations:
258+
validated = validate_segmentation(segment)
259+
260+
if not validated:
261+
raise BadRequest(description=f"Segmentations have missing keys.")
262+
263+
new_segment = generate_segmentation(
264+
data_id=data.id,
265+
project_id=project.id,
266+
end_time=float(segment["end_time"]),
267+
start_time=float(segment["start_time"]),
268+
annotations=segment.get("annotations", {}),
269+
transcription=segment["transcription"],
270+
)
271+
272+
new_segmentations.append(new_segment)
273+
274+
data.set_segmentations(new_segmentations)
275+
276+
db.session.commit()
277+
db.session.refresh(data)
278+
279+
return (
280+
jsonify(
281+
data_id=data.id,
282+
message=f"Data uploaded, created and assigned successfully",
283+
type="DATA_CREATED",
284+
),
285+
201,
286+
)

examples/upload_data/upload_data.py

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,27 +39,25 @@
3939
help="List of segmentations for the audio",
4040
default=[],
4141
)
42+
parser.add_argument(
43+
"--data_url",
44+
type=str,
45+
help="Url of ",
46+
default="",
47+
)
4248
parser.add_argument("--port", type=int, help="Port to make request to", default=80)
4349

4450
args = parser.parse_args()
4551

4652
api_key = os.getenv("API_KEY", None)
4753
headers = {"Authorization": api_key}
4854

49-
audio_path = Path(args.audio_file)
50-
audio_filename = audio_path.name
51-
if audio_path.is_file():
52-
audio_obj = open(audio_path.resolve(), "rb")
53-
else:
54-
print("Audio file does not exist")
55-
exit()
56-
55+
data_url = args.data_url
5756
reference_transcription = args.reference_transcription
5857
username = args.username
5958
is_marked_for_review = args.is_marked_for_review
6059
segmentations = args.segmentations
6160

62-
file = {"audio_file": (audio_filename, audio_obj)}
6361

6462
values = {
6563
"reference_transcription": reference_transcription,
@@ -68,10 +66,31 @@
6866
"is_marked_for_review": is_marked_for_review,
6967
}
7068

71-
print("Creating datapoint")
72-
response = requests.post(
73-
f"http://{args.host}:{args.port}/api/data", files=file, data=values, headers=headers
74-
)
69+
70+
print("Creating datapoint {}".format(f"from url: {data_url}" if data_url else ""))
71+
72+
if data_url:
73+
values.update({"data_url": data_url})
74+
response = requests.post(
75+
f"http://{args.host}:{args.port}/api/dataWithUrl", data=values, headers=headers
76+
)
77+
else:
78+
audio_path = Path(args.audio_file)
79+
audio_filename = audio_path.name
80+
if audio_path.is_file():
81+
audio_obj = open(audio_path.resolve(), "rb")
82+
else:
83+
print("Audio file does not exist")
84+
exit()
85+
file = {"audio_file": (audio_filename, audio_obj)}
86+
87+
response = requests.post(
88+
f"http://{args.host}:{args.port}/api/data",
89+
files=file,
90+
data=values,
91+
headers=headers,
92+
)
93+
7594

7695
if response.status_code == 201:
7796
response_json = response.json()

0 commit comments

Comments
 (0)