File size: 35,517 Bytes
5ff8f40
5b8d529
5ff8f40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b8d529
5ff8f40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b8d529
5ff8f40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac764a6
5ff8f40
 
 
 
 
 
 
5b8d529
5ff8f40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6aed8e6
ac764a6
6aed8e6
 
 
 
 
 
c3fe34b
6aed8e6
 
 
 
 
 
ac764a6
 
6aed8e6
 
 
 
 
 
 
 
 
5ff8f40
c3fe34b
5ff8f40
 
 
 
 
 
 
 
 
 
 
c3fe34b
5ff8f40
 
 
 
 
 
 
 
 
 
 
 
ac764a6
5ff8f40
 
 
ac764a6
c3fe34b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac764a6
 
5ff8f40
 
 
 
 
c3fe34b
5ff8f40
 
 
ac764a6
5ff8f40
 
 
5b8d529
5ff8f40
 
 
 
 
 
 
 
 
 
c3fe34b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ff8f40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6aed8e6
 
5ff8f40
 
 
 
 
 
 
 
 
c3fe34b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b8d529
 
 
 
 
 
 
 
 
 
 
 
 
 
6aed8e6
5b8d529
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c3fe34b
 
 
6aed8e6
c3fe34b
 
 
 
 
 
4474bd3
6aed8e6
4474bd3
6aed8e6
 
 
 
 
 
ac764a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6aed8e6
ac764a6
 
 
 
 
 
 
 
6aed8e6
ac764a6
 
 
 
 
4474bd3
6aed8e6
ac764a6
 
 
 
 
 
c3fe34b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ff8f40
c3fe34b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ff8f40
 
6aed8e6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
import streamlit as st
import requests
import os
import json
from pypdf import PdfReader
from groq import Groq
from dotenv import load_dotenv
import time
import ast

# Load environment variables
load_dotenv()

# Function to extract text from the uploaded PDF
def extract_text_from_pdf(pdf_file):
    text = ""
    try:
        reader = PdfReader(pdf_file)
        for page in reader.pages:
            text += page.extract_text()
    except Exception as e:
        st.error(f"An error occurred while reading the PDF: {e}")
    return text

# Function to classify the extracted text using the LLM
def classification_LLM(text):


    client = Groq(api_key=os.getenv("GROQ_API_KEY"))
    completion = client.chat.completions.create(
        model="llama-3.1-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful classification assistant. You understand engineering concepts. You will be given some text which mostly describes a problem. You have to classify the problem according to a list of choices. More than one choice can also be applicable. Return as a array of applicable CHOICES only. Only return the choices that you are very sure about\n\n#CHOICES\n\n2D Measurement: Diameter, thickness, etc.\n\nAnomaly Detection: Scratches, dents, corrosion\n\nPrint Defect: Smudging, misalignment\n\nCounting: Individual components, features\n\n3D Measurement: Volume, surface area\n\nPresence/Absence: Missing components, color deviations\n\nOCR: Optical Character Recognition, Font types and sizes to be recognized, Reading speed and accuracy requirements\n\nCode Reading: Types of codes to read (QR, Barcode)\n\nMismatch Detection: Specific features to compare for mismatches, Component shapes, color mismatches\n\nClassification: Categories of classes to be identified, Features defining each class\n\nAssembly Verification: Checklist of components or features to verify, Sequence of assembly to be followed\n\nColor Verification: Color standards or samples to match\n"
            },
            {
                "role": "user",
                "content": text
            }
        ],
        temperature=0.21,
        max_tokens=2048,
        top_p=1,
        stream=True,
        stop=None,
    )

    answer = ""
    for chunk in completion:
        answer += chunk.choices[0].delta.content or ""
    return answer

def obsjsoncreate(json_template,text,ogtext):
    client = Groq(api_key=os.getenv("GROQ_API_KEY"))
    completion = client.chat.completions.create(
        model="llama-3.1-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant. You will be given a text snippet. You will also be given a JSON where some of the fields match with the bullet points in the text. I want you return a JSON where only the fields and subproperties mentioned in the text are present. DONT OUTPUT ANYTHING OTHER THAN THE JSON\n"
            },
            {
                "role": "user",
                "content": "JSON:"+str(json_template)+"\nText:"+text
            }
        ],
        temperature=0.21,
        max_tokens=8000,
        top_p=1,
        stream=True,
        stop=None,
    )
    cutjson=""
    for chunk in completion:
        cutjson += chunk.choices[0].delta.content or ""
    
    completion2 = client.chat.completions.create(
        model="llama-3.1-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful classification assistant. You understand engineering concepts. You will be given a JSON where there are properties and their descriptions. You need to fill up the JSON subproperty \"USer Answer\" from the details given in the text. If you are not sure of any field, leave the \"User Answer\" as TBD. Give the JSON output with the filled fields only. ENSURE THE JSON IS VALID AND PROPERLY FORMATTED. DO NOT OUTPUT ANYTHING OTHER THAN THE JSON."
            },
            {
                "role": "user",
                "content": "JSON: "+cutjson+"\n Text: "+ogtext
            }
        ],
        temperature=0.21,
        max_tokens=8000,
        top_p=1,
        stream=True,
        stop=None,
    )
    answer = ""
    for chunk in completion2:
        answer += chunk.choices[0].delta.content or ""
    return answer

def bizobjjsoncreate(json_template,text):
    client = Groq(api_key=os.getenv("GROQ_API_KEY"))
    completion2 = client.chat.completions.create(
        model="llama-3.1-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful classification assistant. You understand engineering concepts. You will be given a JSON where there are properties and their descriptions. You need to fill up the JSON subproperty \"USer Answer\" from the details given in the text. If you are not sure of any field, leave the \"User Answer\" as TBD. Make sure you dont leave out a single field of the JSON. ENSURE THE JSON IS VALID AND PROPERLY FORMATTED. DO NOT OUTPUT ANYTHING OTHER THAN THE JSON."
            },
            {
                "role": "user",
                "content": "JSON: "+str(json_template)+"\n Text: "+text
            }
        ],
        temperature=0.21,
        max_tokens=8000,
        top_p=1,
        stream=True,
        stop=None,
    )
    answer = ""
    for chunk in completion2:
        answer += chunk.choices[0].delta.content or ""
    return answer

def question_create(json_template):

    client = Groq(api_key=os.getenv("GROQ_API_KEY"))
    completion = client.chat.completions.create(
        model="llama-3.1-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant. You will be given a JSON where some subproperties labelled \"User Answer\" are marked as \"TBD\". I want you to create questions that you as an assistant would ask the user in order to fill up the User Answer field. Return all the questions for the user in an array. DONT OUTPUT ANYTHING OTHER THAN THE QUESTION ARRAY."
            },
            {
                "role": "user",
                "content": str(json_template)
            }
        ],
        temperature=0.21,
        max_tokens=2048,
        top_p=1,
        stream=True,
        stop=None,
    )

    answer = ""
    for chunk in completion:
        answer += chunk.choices[0].delta.content or ""

    # print(answer)
    client = Groq()
    completion = client.chat.completions.create(
        model="llama-3.1-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": "You are an experienced writer. You will be given an array of questions. \nSome questions will ask to upload images. Ignore any of these type of questions.\nSome questions ask about different identities or descriptions of the same thing. I want you to merge the questions so as to ask input from them once.\nConvert all questions so that more of a professional is maintained. AVOID REDUNDANCY. DONT RETURN MORE THAN 15 QUESTIONS.\nRETURN AN ARRAY OF THE QUESTIONS ONLY. DO NOT RETURN ANYTHING ELSE. "
            },
            {
                "role": "user",
                "content": answer
            }
        ],
        temperature=0.45,
        max_tokens=4240,
        top_p=1,
        stream=True,
        stop=None,
    )
    final=""
    for chunk in completion:
        final+=chunk.choices[0].delta.content or ""

    return final

def qapair_create(script):

    client = Groq(api_key=os.getenv("GROQ_API_KEY"))
    completion = client.chat.completions.create(
        model="llama-3.1-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant. You will be given two arrays, questions and answer. I want you to create a question answer pair. For example, \n#INPUT\nQuestion=['What is my name?', 'What is your age?']\nAnswer=['Mohan','69']\n\n#OUTPUT\n['Question:What is my name? Answer:Mohan','What is your age? Answer:69']\n\nDONT RETURN ANYTHING OTHER THAN THE FINAL ARRAY"
            },
            {
                "role": "user",
                "content": str(script)
            }
        ],
        temperature=0.5,
        max_tokens=4048,
        top_p=1,
        stream=True,
        stop=None,
    )

    qapair = ""
    for chunk in completion:
        qapair += chunk.choices[0].delta.content or ""
    return qapair
    # print(qapair)
    # print(obs_json_template+bizobj_json_template)
    # print("Question Answer:"+str(qapair)+"\nJSON:\n"+str(obs_json_template+bizobj_json_template))
    # print(str(obs_json_template+bizobj_json_template))
def conflict_detect(json1,json2):

    client = Groq(api_key=os.getenv("GROQ_API_KEY"))
    completion = client.chat.completions.create(
        model="llama-3.1-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": "You are an experienced copywriter who understands engineering concepts. You will be given 2 JSONs. I want you to compare the \"User Answer\" field between the 2 JSONs, and then check for conflicts. Check for any conflicts within numerical values. If you find a conflict, mark the field as CONFLICT and always give the reasoning behind the conflict. However be careful to check if the 2 fields to be compared essentially mean the same thing, in that case return either answer. If either field says TBD, ignore the test and output the test where user answer s not TBD.\nFor example:\nUser Answer 1: \"TBD\"\nUser Answer 2: \"The budget is 15000\"\nResult: \"The budget is 15000\"\n\nUser Answer 1: \"The expected ROI is 5 years\"\nUser Answer 2:\"The ROI is to be within 15 years\"\nResult:\"CONFLICT - The first answer mentions ROI as 5 years while the second answer mentions ROI as 15 years\"\n\nUser Answer 1: \"The KPIs to be measured are speed and accuracy\"\nUser Answer 2: \"TBD\"\nResult: \"The KPIs to be measured are speed and accuracy\"\n\nUser Answer 1: \"There will be 1GB Internet Connection\"\nUser Answer 2: \"There will be EthernetIP present\"\nResult: \"There will be EthernetIP with 1GB internet connection present\"\n\nIn the end only return the filled JSON. DONT RETURN ANYTHING OTHER THAN THE JSON."
            },
            {
                "role": "user",
                "content": "JSON 1:\n"+str(json1)+"\nJSON 2:\n"+str(json2)
            }
        ],
        temperature=0.24,
        max_tokens=5220,
        top_p=1,
        stream=True,
        stop=None,
    )

    filled_json=""
    for chunk in completion:
        filled_json+=chunk.choices[0].delta.content or ""
    
    # completion2 = client.chat.completions.create(
    # model="llama-3.1-70b-versatile",
    # messages=[
    #     {
    #         "role": "system",
    #         "content": "You are a helpful assistant. You will be given a JSON where some subproperties marked as \"User Answer\" which have fields marked as \"CONFLICT\". \nFor these fields, there is also a reason. In a professional tone, formulate a question asking as to what the correct answer should be while explaining the reason clearly. Return an array of all these questions. DONT OUTPUT ANYTHING OTHER THAN THE ARRAY.\n"
    #     },
    #     {
    #         "role": "user",
    #         "content": filled_json
    #                 }
    # ],
    # temperature=0.24,
    # max_tokens=5220,
    # top_p=1,
    # stream=True,
    # stop=None,
    # )
    # questions=""
    # for chunk in completion2:
    #     questions+=chunk.choices[0].delta.content or ""
    return filled_json

def answer_refill(qapair,json_template):
    client = Groq(api_key=os.getenv("GROQ_API_KEY"))
    completion2 = client.chat.completions.create(
        model="llama-3.1-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant. You will be given a Question-answer pair. You will be given a json. Some subproperties in the JSONs labelled \"User Answer\" are marked as TBD. Based on the question answer pair, I want you to fill the Answer of the question answer pair as it is into the \"User answer\" subproperty. Check the description of the field against the question and be sure to fill the correct field for the correct question. Make sure you return the full JSON, without missing any field. After filling, merge the two filled JSONs. Then return the final completely filled JSON. DONT OUTPUT ANYTHING OTHER THAN THE JSONS."
            },
            {
                "role": "user",
                "content": "Question Answer:"+str(qapair)+"\nJSON:\n"+str(json_template)
            }
        ],
        temperature=1,
        max_tokens=8000,
        top_p=1,
        stream=True,
        stop=None,
    )
    filled_json=""
    for chunk in completion2:
        filled_json+=chunk.choices[0].delta.content or ""
    # print(filled_json)
    return filled_json

def response_filter(record,key,opt):
    if opt==1:
        client = Groq(api_key=os.getenv("GROQ_API_KEY_1"))
    else:
        client = Groq(api_key=os.getenv("GROQ_API_KEY"))
    completion = client.chat.completions.create(
    model="llama-3.1-70b-versatile",
    messages=[
        {
            "role": "system",
            "content": "You are a helpful assistant. You will be given a big JSON. I want to only extract one key out of that. RETURN THE VARIABLE ASKED FOR ONLY"
        },
        {
            "role": "user",
            "content": "Var="+str(key)+"\nJSON:\n"+str(record)
        }
    ],
    temperature=0.38,
    max_tokens=7830,
    top_p=1,
    stream=True,
    stop=None,
    )
    filled_json=""
    for chunk in completion:
        filled_json+=chunk.choices[0].delta.content or ""
    # print(filled_json)
    return filled_json
def executive_summary_complete(json_template):


    client = Groq(api_key=os.getenv("GROQ_API_KEY"))
    completion = client.chat.completions.create(
        model="llama-3.1-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": "You are a professional copyrighter. You will be given a JSON, I want you to create a complete executive summary with headers and subheaders. It should be a structured document. \"User Answer\" are what are the answers you have to focus on. Dont skip any of the Fields in both JSONs. Use the Description to frame the User answer. DONT OUTPUT ANYTHING OTHER THAN THE SUMMARY."
            },
            {
                "role": "user",
                "content": str(json_template)
            }
        ],
        temperature=0.73,
        max_tokens=5610,
        top_p=1,
        stream=True,
        stop=None,
    )
    final_summ=""
    for chunk in completion:
        final_summ+=chunk.choices[0].delta.content or ""
    return final_summ

def executive_summary(questions):


    client = Groq(api_key=os.getenv("GROQ_API_KEY"))
    
    completion = client.chat.completions.create(
        model="llama-3.1-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant. You will be given an array of engineering questions. There maybe some repetitions in the questions. Remove them.\n\nNow create a professional mail with this approach:\nyou thank the recipient for the last meeting. Then after talking with your team you have a series of questions. You then list out the questions in a bullet point fashion."
            },
            {
                "role": "user",
                "content": str(questions)
            }
        ],
        temperature=0.65,
        max_tokens=5220,
        top_p=1,
        stream=True,
        stop=None,
    )
    final_summ=""
    for chunk in completion:
        final_summ+=chunk.choices[0].delta.content or ""
    return final_summ

def conflict_summary(json):


    client = Groq(api_key=os.getenv("GROQ_API_KEY"))
    
    completion = client.chat.completions.create(
        model="llama-3.1-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant. You will be given a JSON. The fields where \"User Answer\" is marked as \"CONFLICT\", I want you to make questions asking the difference only for these fields. If either User Answer is marked as TBD, DONT CREATE QUESTIONS FOR THAT. Remember to always include the reason for conflict. The final output should be a mail which outlines all the questions. The mail should say how after discussion with the team you have come up with these questions. Maintain a professional tone. Refer to JSON1 as initial communication. Refer to JSON2 as latest communication. Dont mention the words \"JSON1\" and \"JSON2\" anywhere"
            },
            {
                "role": "user",
                "content": str(json)
            }
        ],
        temperature=0.65,
        max_tokens=7220,
        top_p=1,
        stream=True,
        stop=None,
    )
    final_summ=""
    for chunk in completion:
        final_summ+=chunk.choices[0].delta.content or ""
    return final_summ

def airtable_retrieve(identifier):
    base_id = 'appcl0egQeE4pP5ID'
    table_name = 'tblfQBynpcfdDUywV'
    api_key = os.getenv("AIRTABLE_KEY")

    # API endpoint
    base_url = f'https://api.airtable.com/v0/{base_id}/{table_name}'

    # Headers for authentication
    headers = {
        'Authorization': f'Bearer {api_key}',
        'Content-Type': 'application/json'
    }

    def get_record_by_identifier(identifier):
        try:
            # Construct the filter formula
            filter_formula = f"{{Identifier}} = '{identifier}'"
            
            # Make the API request
            response = requests.get(
                base_url,
                headers=headers,
                params={'filterByFormula': filter_formula}
            )
            
            # Check if the request was successful
            response.raise_for_status()
            
            # Parse the JSON response
            data = response.json()
            
            if data['records']:
                if len(data['records']) > 1:
                    print(f"Warning: Multiple records found for identifier '{identifier}'. Returning the first one.")
                return data['records'][-1]['fields']
            else:
                raise ValueError(f"No record found for identifier '{identifier}'")
        
        except requests.exceptions.RequestException as e:
            print(f"An error occurred while making the request: {str(e)}")
            return None
        except ValueError as e:
            print(str(e))
            return None
        except Exception as e:
            print(f"An unexpected error occurred: {str(e)}")
            return None

    # Example usage
    
    record = get_record_by_identifier(identifier)
    # print(record)
    if record:
        # print("Record found:")
        # for field, value in record.items():
        #     print(f"{field}: {value}")
        return record
    else:
        return "404"

def combine_json_files(directory):
    combined_data = {}
    for filename in os.listdir(directory):
        if filename.endswith('.json'):
            file_path = os.path.join(directory, filename)
            key = os.path.splitext(filename)[0]  # Use filename without extension as key
            with open(file_path, 'r') as file:
                combined_data[key] = json.load(file)
    return combined_data

def prepare_json_string(json_data):
    # Convert the JSON data to a string, escaping any problematic characters
    return json.dumps(json.dumps(json_data))

def airtable_write_main(json_strings,id):
    API_KEY = os.getenv("AIRTABLE_KEY")
    BASE_ID = 'appcl0egQeE4pP5ID'
    TABLE_ID = 'tblfQBynpcfdDUywV'
    
    url = f'https://api.airtable.com/v0/{BASE_ID}/{TABLE_ID}'

    headers = {
        'Authorization': f'Bearer {API_KEY}',
        'Content-Type': 'application/json'
    }

    # Prepare the record for Airtable
    record = {
        "fields": {
            "Identifier": id,  # You may want to generate a unique identifier here
            "BIZ_OBS_JSON": json_strings.get("BIZ_OBS_JSON", "[]"),
            "PROD_VAR_INFO_JSON": json_strings.get("PROD_VAR_INFO_JSON", "[]"),
            "CUSTOMER_DEPENDENCY_JSON": json_strings.get("CUSTOMER_DEPENDENCY_JSON", "[]"),
            "MATERIAL_HANDLING_JSON": json_strings.get("MATERIAL_HANDLING_JSON", "[]"),
            "SOFTWARE_JSON": json_strings.get("SOFTWARE_JSON", "[]"),
            "ACCEPTANCE_JSON": json_strings.get("ACCEPTANCE_JSON", "[]"),
            "OBS_JSON": json_strings.get("OBS_JSON", "[]")
        }
    }

    # Prepare the payload for Airtable
    payload = {
        "records": [record]
    }

    # Make the POST request to add the record
    response = requests.post(url, headers=headers, data=json.dumps(payload))

    # Check if the request was successful
    if response.status_code == 200:
        print("Record added successfully!")
    else:
        print(f"Failed to add record. Status code: {response.status_code}, Error: {response.text}")

def chunk_data(data, chunk_size=10):
    if isinstance(data, dict):
        # If data is a dictionary, convert it to a list of key-value pairs
        items = list(data.items())
    elif isinstance(data, list):
        items = data
    else:
        raise TypeError("Data must be either a dictionary or a list")
    
    return [dict(items[i:i + chunk_size]) for i in range(0, len(items), chunk_size)]

def airtable_write(json_template):

    client = Groq(api_key=os.getenv("GROQ_API_KEY"))

    # Groq inference
    completion = client.chat.completions.create(
        model="llama-3.1-70b-versatile",
        messages=[
            {
                "role": "system",
                "content":  "You are a helpful assistant. You will be given a unstructured JSON. I want you to convert it into a fully structured JSON which will become a structured CSV. The headings of the CSV are to be \\\"Category\\\",\\\"Sub-category\\\",\\\"Description\\\" and \\\"User Answer\\\". So shuffle around the fields accordingly. \nFields marked \"Category\" are to be directly picked as the \"Category\" for the CSV. If there is \"Observation type\", then that becomes the Category.  \nDONT LEAVE ANY FIELD. MAKE SURE ALL FIELDS ARE INCLUDED IN THE RESULT. DONT OUTPUT ANYTHING OTHER THAN THE JSON. ONLY OUTPUT THE JSON.\n"
            },
            {
                "role": "user",
                "content": json_template
            }
        ],
        temperature=0.25,
        max_tokens=8000,
        top_p=1,
        stream=True,
        # response_format={"type": "json_object"},
        stop=None,
    )
    content=""
    for chunk in completion:
        content+=chunk.choices[0].delta.content or ""
    # Get the structured JSON from Groq
    groq_json = json.loads(content)
    with open("groq_json.json", "w") as file:
        json.dump(groq_json, file, indent=4)
    API_KEY = os.getenv("AIRTABLE_KEY")
    BASE_ID = 'appcl0egQeE4pP5ID'
    TABLE_ID = 'tbl2AaOSxyBv6ObR5'
    url = f'https://api.airtable.com/v0/{BASE_ID}/{TABLE_ID}'

    headers = {
        'Authorization': f'Bearer {API_KEY}',
        'Content-Type': 'application/json'
    }

    # Chunk the data into batches of 10
    def chunk_data(data, chunk_size=10):
        for i in range(0, len(data), chunk_size):
            yield data[i:i + chunk_size]

    # Process each chunk and send it to Airtable
    for batch in chunk_data(groq_json):
        # Format the current batch for Airtable API
        airtable_data = {
            "records": [
                {
                    "fields": {
                        "Category": item["Category"],
                        "Sub-category": item["Sub-category"],
                        "Description": item["Description"],
                        "User Answer": item["User Answer"]
                    }
                } for item in batch
            ]
        }
        
        # Make the POST request to add records
        response = requests.post(url, headers=headers, data=json.dumps(airtable_data))
        
        # Check if the request was successful
        if response.status_code == 200:
            print(f"Batch of {len(batch)} records added successfully!")
        else:
            print(f"Failed to add batch. Status code: {response.status_code}, Error: {response.text}")

def process_new_customer(unique_id):
    st.write(f"Processing new customer with ID: {unique_id}")
    st.write("Please upload the first communication:")
    uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")
    if uploaded_file is not None:
        button1(uploaded_file,unique_id)
    # # Add your document processing steps for new customers here
    # # For example:
    # uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")
def button1(uploaded_file,unique_id):   
    st.write("Parsing Document ...")
    st.session_state.text = extract_text_from_pdf(uploaded_file)
    st.write("Running Classification Algorithm...")
    st.session_state.classification_result = classification_LLM(st.session_state.text)
    json_path='observationsJSON.json'
    with open(json_path, 'r') as file:
        obs_json_template = json.load(file)
    final_obs_json = obsjsoncreate(obs_json_template, st.session_state.classification_result, st.session_state.text)
    st.session_state.obs = final_obs_json

    json_path='Biz_Obj.json'
    with open(json_path, 'r') as file:
        bizobj_json = json.load(file)
    final_bizobj_json = bizobjjsoncreate(bizobj_json, st.session_state.text)

    json_path='Prod_var_info.json'
    with open(json_path, 'r') as file:
        prodvarinfo_json = json.load(file)
    final_prodvarinfo_json = bizobjjsoncreate(prodvarinfo_json, st.session_state.text)

    json_path='Material_handling.json'
    with open(json_path, 'r') as file:
        materialhandling_json = json.load(file)
    final_materialhandling_json = bizobjjsoncreate(materialhandling_json, st.session_state.text)

    json_path='software.json'
    with open(json_path, 'r') as file:
        software_json = json.load(file)
    final_software_json = bizobjjsoncreate(software_json, st.session_state.text)

    json_path='Customer_dependency.json'
    with open(json_path, 'r') as file:
        customerdependency_json = json.load(file)
    final_customerdependency_json = bizobjjsoncreate(customerdependency_json, st.session_state.text)

    json_path='acceptance.json'
    with open(json_path, 'r') as file:
        acceptance_json = json.load(file)
    final_acceptance_json = bizobjjsoncreate(acceptance_json, st.session_state.text)

    st.session_state.bizobj = final_bizobj_json
    st.session_state.prodvarinfo = final_prodvarinfo_json
    st.session_state.materialhandling = final_materialhandling_json
    st.session_state.software = final_software_json
    st.session_state.customerdependency = final_customerdependency_json
    st.session_state.acceptance = final_acceptance_json
    st.write("Creating Questions...")
    questionobs = question_create(final_obs_json)
    question_bizobj = question_create(final_bizobj_json)
    question_prodvarinfo = question_create(final_prodvarinfo_json)
    question_materialhandling = question_create(final_materialhandling_json)
    question_software = question_create(final_software_json)
    question_customerdependency = question_create(final_customerdependency_json)
    question_acceptance = question_create(final_acceptance_json)
    totquestions=questionobs+question_bizobj+question_prodvarinfo+question_materialhandling+question_software+question_customerdependency+question_acceptance
    st.write("Creating Question Email...")
    exec_questions=executive_summary(totquestions)
    final_json=final_bizobj_json+final_prodvarinfo_json+final_materialhandling_json+final_software_json+final_customerdependency_json+final_acceptance_json+final_obs_json
    exec_summ=executive_summary_complete(final_json)
    st.write(exec_summ)
    st.write("Here is the Composed Mail for the customers ->")
    st.write(exec_questions)
    json_strings = {
                        "BIZ_OBS_JSON": st.session_state.bizobj,
                        "PROD_VAR_INFO_JSON": st.session_state.prodvarinfo,
                        "CUSTOMER_DEPENDENCY_JSON": st.session_state.customerdependency,
                        "MATERIAL_HANDLING_JSON": st.session_state.materialhandling,
                        "SOFTWARE_JSON": st.session_state.software,
                        "ACCEPTANCE_JSON": st.session_state.acceptance,
                        "OBS_JSON": st.session_state.obs
                } 
    airtable_write_main(json_strings,unique_id)


def process_registered_customer(unique_id):
    st.write(f"Processing registered customer with ID: {unique_id}")
    # Add your document processing steps for registered customers here
    # For example:
    record=airtable_retrieve(unique_id)
    if record == '404':
        st.write("Record Not Found. Please restart and check identifier")
        return
    json_data=record
    # st.write(record)
    # parsed_data=json.loads(str(record))
    obs_json = json_data.get("OBS_JSON")
    biz_obs_json = json_data.get("BIZ_OBS_JSON")
    software_json = json_data.get("SOFTWARE_JSON")
    customer_dependency_json = json_data.get("CUSTOMER_DEPENDENCY_JSON")
    prod_var_info_json = json_data.get("PROD_VAR_INFO_JSON")
    material_handling_json = json_data.get("MATERIAL_HANDLING_JSON")
    acceptance_json = json_data.get("ACCEPTANCE_JSON")
    # st.write(biz_obs_json)
    # biz_obs_json = response_filter(record,"BIZ_OBS_JSON",1)
    # prod_var_info_json = response_filter(record,"PROD_VAR_INFO_JSON",2)
    # material_handling_json = response_filter(record,"MATERIAL_HANDLING_JSON",1)
    # software_json = response_filter(record,"SOFTWARE_JSON",2)
    # customer_dependency_json = response_filter(record,"CUSTOMER_DEPENDENCY_JSON",1)
    # acceptance_json = response_filter(record,"ACCEPTANCE_JSON",2)
    # obs_json = response_filter(record,"OBS_JSON",1)
    st.write("Records Retrieved. Please enter the Questions and Answers:")
    qa=st.chat_input("Please enter the questions given to customer and their answers:")
    if qa:
        qapair=qapair_create(qa)
        # st.write(qapair)
        json_path='Biz_Obj.json'
        with open(json_path, 'r') as file:
            new_bizobj_json = json.load(file)
        filled_bizobj = answer_refill(qapair,new_bizobj_json)

        json_path='Prod_var_info.json'
        with open(json_path, 'r') as file:
            new_prodvarinfo_json = json.load(file)
        filled_prodvar = answer_refill(qapair,new_prodvarinfo_json)

        json_path='Material_handling.json'
        with open(json_path, 'r') as file:
            new_materialhandling_json = json.load(file)
        filled_material_handling = answer_refill(qapair,new_materialhandling_json)

        json_path='software.json'
        with open(json_path, 'r') as file:
            new_software_json = json.load(file)
        filled_software = answer_refill(qapair,new_software_json)

        json_path='Customer_dependency.json'
        with open(json_path, 'r') as file:
            new_customerdependency_json = json.load(file)
        filled_customer_dependency = answer_refill(qapair,new_customerdependency_json)

        json_path='acceptance.json'
        with open(json_path, 'r') as file:
            new_acceptance_json = json.load(file)
        filled_acceptance = answer_refill(qapair,new_acceptance_json)

        # filled_bizobj=answer_refill(qapair,biz_obs_json)
        # filled_prodvar=answer_refill(qapair,prod_var_info_json)
        # filled_material_handling=answer_refill(qapair,material_handling_json)
        # filled_software=answer_refill(qapair,software_json)
        # filled_customer_dependency=answer_refill(qapair,customer_dependency_json)
        filled_obs=answer_refill(qapair,obs_json)
        # filled_acceptance=answer_refill(qapair,acceptance_json)
        # st.write("Biz_obs_JSON")
        # st.write(biz_obs_json)
        # st.write("Filled_JSON")
        # st.write(filled_bizobj)
        print("reached")
        # st.write(filled_bizobj)
        st.write("Checking for Conflicts...")
        conquest_bizobj=conflict_detect(biz_obs_json,filled_bizobj)
        conquest_prodvar=conflict_detect(prod_var_info_json,filled_prodvar)
        conquest_material_handling=conflict_detect(material_handling_json,filled_material_handling)
        conquest_software=conflict_detect(software_json,filled_software)
        conquest_customer_dependency=conflict_detect(customer_dependency_json,filled_customer_dependency)
        conquest_acceptance=conflict_detect(acceptance_json,filled_acceptance)
        conquest_obs=conflict_detect(obs_json,filled_obs)
        # st.write(conquest_bizobj)
        st.write("Executive summary ....")

        st.write("Creating Question Email...")
        totquestions=conquest_obs+conquest_bizobj+conquest_prodvar+conquest_material_handling+conquest_software+conquest_customer_dependency+conquest_acceptance
        final_json=filled_bizobj+filled_prodvar+filled_material_handling+filled_software+filled_customer_dependency+filled_acceptance+filled_obs
        exec_summ=conflict_summary(totquestions)
        # exec_summ=executive_summary(totquestions)
        st.write(exec_summ)
        # final_json=filled_bizobj+filled_prodvar+filled_material_handling+filled_software+filled_customer_dependency+filled_acceptance+filled_obs
        exec_summ=executive_summary_complete(final_json)
        st.write(exec_summ)
        json_strings = {
                        "BIZ_OBS_JSON": filled_bizobj,
                        "PROD_VAR_INFO_JSON": filled_prodvar,
                        "CUSTOMER_DEPENDENCY_JSON": filled_customer_dependency,
                        "MATERIAL_HANDLING_JSON": filled_material_handling,
                        "SOFTWARE_JSON": filled_software,
                        "ACCEPTANCE_JSON": filled_acceptance,
                        "OBS_JSON": filled_obs
                } 
        airtable_write_main(json_strings,unique_id)



def main():
    st.title("Qualitas Sales Chatbot")

    col1, col2 = st.columns(2)

    with col1:
        new_customer = st.button("New Customer")
    with col2:
        registered_customer = st.button("Registered Customer")

    if new_customer:
        st.session_state.workflow = "new"
        st.session_state.step = "id_input"
    elif registered_customer:
        st.session_state.workflow = "registered"
        st.session_state.step = "id_input"

    if "workflow" in st.session_state:
        if st.session_state.step == "id_input":
            st.write("Please Enter a Unique Identifier:")
            unique_id = st.chat_input("Please enter a unique identifier:")
            if unique_id:
                st.session_state.unique_id = unique_id
                st.session_state.step = "processing"
                st.rerun()

        if st.session_state.step == "processing":
            if st.session_state.workflow == "new":
                process_new_customer(st.session_state.unique_id)
                
            else:
                process_registered_customer(st.session_state.unique_id)

            # st.session_state.clear()

if __name__ == "__main__":
    main()