Omar ID EL MOUMEN commited on
Commit
e584cfd
·
1 Parent(s): ae9ea80

Remove old function

Browse files
Files changed (1) hide show
  1. app.py +69 -69
app.py CHANGED
@@ -366,7 +366,7 @@ async def main_menu():
366
  return FileResponse(os.path.join("templates", "index.html"))
367
 
368
  @app.post("/search-spec", response_model=KeywordResponse)
369
- def search_spec_v2(request: KeywordRequest):
370
  start_time = time.time()
371
  kws = [_.lower() for _ in request.keywords.split(" ")]
372
  results = []
@@ -398,80 +398,80 @@ def search_spec_v2(request: KeywordRequest):
398
  else:
399
  raise HTTPException(status_code=404, detail="Specifications not found")
400
 
401
- def search_spec(request: KeywordRequest):
402
- chars = "0123456789abcdefghijklmnopqrstuvwxyz"
403
- start_time = time.time()
404
- response = requests.get(f'https://www.3gpp.org/dynareport?code=status-report.htm', headers={"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}, verify=False)
405
- dfs = pd.read_html(StringIO(response.text), storage_options={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}, encoding="utf-8")
406
 
407
- for x in range(len(dfs)):
408
- dfs[x] = dfs[x].replace({np.nan: None})
409
 
410
- columns_needed = [0, 1, 2, 3, 4]
411
- extracted_dfs: List[pd.DataFrame] = [df.iloc[:, columns_needed] for df in dfs]
412
- columns = [x.replace("\xa0", "_") for x in extracted_dfs[0].columns]
413
 
414
- specifications = []
415
 
416
- for df in extracted_dfs:
417
- for index, row in df.iterrows():
418
- doc = row.to_list()
419
- doc_dict = dict(zip(columns, doc))
420
- specifications.append(doc_dict)
421
 
422
- kws = [_.lower() for _ in request.keywords.split(" ")]
423
- results = []
424
-
425
- for spec in specifications:
426
- if request.mode == "and":
427
- if not all(kw in spec["title"].lower() for kw in kws):
428
- continue
429
- elif request.mode == "or":
430
- if not any(kw in spec["title"].lower() for kw in kws):
431
- continue
432
- release = request.release
433
- working_group = request.wg
434
- spec_type = request.spec_type
435
-
436
- if spec.get('vers', None) is None or (release is not None and spec["vers"].split(".")[0] != str(release)):
437
- continue
438
- if spec.get('WG', None) is None or (working_group is not None and spec["WG"] != working_group):
439
- continue
440
- if spec_type is not None and spec["type"] != spec_type:
441
- continue
442
 
443
- doc_id = str(spec["spec_num"])
444
- series = doc_id.split(".")[0]
445
- a, b, c = str(spec["vers"]).split(".")
446
- print(spec["vers"])
447
- if not (int(a) > 35 or int(b) > 35 or int(c) > 35):
448
- spec_url = f"https://www.3gpp.org/ftp/Specs/archive/{series}_series/{doc_id}/{doc_id.replace('.', '')}-{chars[int(a)]}{chars[int(b)]}{chars[int(c)]}.zip"
449
- else:
450
- x,y,z = str(a), str(b), str(c)
451
- while len(x) < 2:
452
- x = "0" + x
453
- while len(y) < 2:
454
- y = "0" + y
455
- while len(z) < 2:
456
- z = "0" + z
457
- spec_url = f"https://www.3gpp.org/ftp/Specs/archive/{series}_series/{doc_id}/{doc_id.replace('.', '')}-{x}{y}{z}.zip"
458
- results.append({
459
- "id": str(spec["spec_num"]),
460
- "title": spec["title"],
461
- "type": "Technical Specification" if spec["type"] == "TS" else "Technical Report",
462
- "release": str(spec["vers"].split(".")[0]),
463
- "version": str(spec["vers"]),
464
- "working_group": spec["WG"],
465
- "url": spec_url
466
- })
467
-
468
- if len(results) > 0:
469
- return KeywordResponse(
470
- results=results,
471
- search_time=time.time() - start_time
472
- )
473
- else:
474
- raise HTTPException(status_code=404, detail="Specification not found")
475
 
476
  @app.post("/find", response_model=DocResponse)
477
  def find_document(request: DocRequest):
 
366
  return FileResponse(os.path.join("templates", "index.html"))
367
 
368
  @app.post("/search-spec", response_model=KeywordResponse)
369
+ def search_spec(request: KeywordRequest):
370
  start_time = time.time()
371
  kws = [_.lower() for _ in request.keywords.split(" ")]
372
  results = []
 
398
  else:
399
  raise HTTPException(status_code=404, detail="Specifications not found")
400
 
401
+ # def search_spec(request: KeywordRequest):
402
+ # chars = "0123456789abcdefghijklmnopqrstuvwxyz"
403
+ # start_time = time.time()
404
+ # response = requests.get(f'https://www.3gpp.org/dynareport?code=status-report.htm', headers={"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}, verify=False)
405
+ # dfs = pd.read_html(StringIO(response.text), storage_options={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}, encoding="utf-8")
406
 
407
+ # for x in range(len(dfs)):
408
+ # dfs[x] = dfs[x].replace({np.nan: None})
409
 
410
+ # columns_needed = [0, 1, 2, 3, 4]
411
+ # extracted_dfs: List[pd.DataFrame] = [df.iloc[:, columns_needed] for df in dfs]
412
+ # columns = [x.replace("\xa0", "_") for x in extracted_dfs[0].columns]
413
 
414
+ # specifications = []
415
 
416
+ # for df in extracted_dfs:
417
+ # for index, row in df.iterrows():
418
+ # doc = row.to_list()
419
+ # doc_dict = dict(zip(columns, doc))
420
+ # specifications.append(doc_dict)
421
 
422
+ # kws = [_.lower() for _ in request.keywords.split(" ")]
423
+ # results = []
424
+
425
+ # for spec in specifications:
426
+ # if request.mode == "and":
427
+ # if not all(kw in spec["title"].lower() for kw in kws):
428
+ # continue
429
+ # elif request.mode == "or":
430
+ # if not any(kw in spec["title"].lower() for kw in kws):
431
+ # continue
432
+ # release = request.release
433
+ # working_group = request.wg
434
+ # spec_type = request.spec_type
435
+
436
+ # if spec.get('vers', None) is None or (release is not None and spec["vers"].split(".")[0] != str(release)):
437
+ # continue
438
+ # if spec.get('WG', None) is None or (working_group is not None and spec["WG"] != working_group):
439
+ # continue
440
+ # if spec_type is not None and spec["type"] != spec_type:
441
+ # continue
442
 
443
+ # doc_id = str(spec["spec_num"])
444
+ # series = doc_id.split(".")[0]
445
+ # a, b, c = str(spec["vers"]).split(".")
446
+ # print(spec["vers"])
447
+ # if not (int(a) > 35 or int(b) > 35 or int(c) > 35):
448
+ # spec_url = f"https://www.3gpp.org/ftp/Specs/archive/{series}_series/{doc_id}/{doc_id.replace('.', '')}-{chars[int(a)]}{chars[int(b)]}{chars[int(c)]}.zip"
449
+ # else:
450
+ # x,y,z = str(a), str(b), str(c)
451
+ # while len(x) < 2:
452
+ # x = "0" + x
453
+ # while len(y) < 2:
454
+ # y = "0" + y
455
+ # while len(z) < 2:
456
+ # z = "0" + z
457
+ # spec_url = f"https://www.3gpp.org/ftp/Specs/archive/{series}_series/{doc_id}/{doc_id.replace('.', '')}-{x}{y}{z}.zip"
458
+ # results.append({
459
+ # "id": str(spec["spec_num"]),
460
+ # "title": spec["title"],
461
+ # "type": "Technical Specification" if spec["type"] == "TS" else "Technical Report",
462
+ # "release": str(spec["vers"].split(".")[0]),
463
+ # "version": str(spec["vers"]),
464
+ # "working_group": spec["WG"],
465
+ # "url": spec_url
466
+ # })
467
+
468
+ # if len(results) > 0:
469
+ # return KeywordResponse(
470
+ # results=results,
471
+ # search_time=time.time() - start_time
472
+ # )
473
+ # else:
474
+ # raise HTTPException(status_code=404, detail="Specification not found")
475
 
476
  @app.post("/find", response_model=DocResponse)
477
  def find_document(request: DocRequest):